@ -22,8 +22,16 @@ from ..utils import (
class HttpFD ( FileDownloader ) :
class HttpFD ( FileDownloader ) :
def real_download ( self , filename , info_dict ) :
def real_download ( self , filename , info_dict ) :
url = info_dict [ ' url ' ]
url = info_dict [ ' url ' ]
tmpfilename = self . temp_name ( filename )
stream = None
class DownloadContext ( dict ) :
__getattr__ = dict . get
__setattr__ = dict . __setitem__
__delattr__ = dict . __delitem__
ctx = DownloadContext ( )
ctx . filename = filename
ctx . tmpfilename = self . temp_name ( filename )
ctx . stream = None
# Do not include the Accept-Encoding header
# Do not include the Accept-Encoding header
headers = { ' Youtubedl-no-compression ' : ' True ' }
headers = { ' Youtubedl-no-compression ' : ' True ' }
@ -38,46 +46,51 @@ class HttpFD(FileDownloader):
if is_test :
if is_test :
request . add_header ( ' Range ' , ' bytes=0- %s ' % str ( self . _TEST_FILE_SIZE - 1 ) )
request . add_header ( ' Range ' , ' bytes=0- %s ' % str ( self . _TEST_FILE_SIZE - 1 ) )
# Establish possible resume length
ctx . open_mode = ' wb '
if os . path . isfile ( encodeFilename ( tmpfilename ) ) :
ctx . resume_len = 0
resume_len = os . path . getsize ( encodeFilename ( tmpfilename ) )
else :
resume_len = 0
open_mode = ' wb '
if resume_len != 0 :
if self . params . get ( ' continuedl ' , True ) :
if self . params . get ( ' continuedl ' , True ) :
self . report_resuming_byte ( resume_len )
# Establish possible resume length
request . add_header ( ' Range ' , ' bytes= %d - ' % resume_len )
if os . path . isfile ( encodeFilename ( ctx . tmpfilename ) ) :
open_mode = ' ab '
ctx . resume_len = os . path . getsize ( encodeFilename ( ctx . tmpfilename ) )
else :
resume_len = 0
count = 0
count = 0
retries = self . params . get ( ' retries ' , 0 )
retries = self . params . get ( ' retries ' , 0 )
while count < = retries :
class SucceedDownload ( Exception ) :
pass
class RetryDownload ( Exception ) :
def __init__ ( self , source_error ) :
self . source_error = source_error
def establish_connection ( ) :
if ctx . resume_len != 0 :
self . report_resuming_byte ( ctx . resume_len )
request . add_header ( ' Range ' , ' bytes= %d - ' % ctx . resume_len )
ctx . open_mode = ' ab '
# Establish connection
# Establish connection
try :
try :
data = self . ydl . urlopen ( request )
ctx. data = self . ydl . urlopen ( request )
# When trying to resume, Content-Range HTTP header of response has to be checked
# When trying to resume, Content-Range HTTP header of response has to be checked
# to match the value of requested Range HTTP header. This is due to a webservers
# to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range
# that don't support resuming and serve a whole file with no Content-Range
# set in response despite of requested Range (see
# set in response despite of requested Range (see
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
# https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
if resume_len > 0 :
if ctx. resume_len > 0 :
content_range = data . headers . get ( ' Content-Range ' )
content_range = ctx. data. headers . get ( ' Content-Range ' )
if content_range :
if content_range :
content_range_m = re . search ( r ' bytes ( \ d+)- ' , content_range )
content_range_m = re . search ( r ' bytes ( \ d+)- ' , content_range )
# Content-Range is present and matches requested Range, resume is possible
# Content-Range is present and matches requested Range, resume is possible
if content_range_m and resume_len == int ( content_range_m . group ( 1 ) ) :
if content_range_m and ctx. resume_len == int ( content_range_m . group ( 1 ) ) :
break
return
# Content-Range is either not present or invalid. Assuming remote webserver is
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
# and performing entire redownload
self . report_unable_to_resume ( )
self . report_unable_to_resume ( )
resume_len = 0
ctx. resume_len = 0
open_mode = ' wb '
ctx. open_mode = ' wb '
break
return
except ( compat_urllib_error . HTTPError , ) as err :
except ( compat_urllib_error . HTTPError , ) as err :
if ( err . code < 500 or err . code > = 600 ) and err . code != 416 :
if ( err . code < 500 or err . code > = 600 ) and err . code != 416 :
# Unexpected HTTP error
# Unexpected HTTP error
@ -86,15 +99,15 @@ class HttpFD(FileDownloader):
# Unable to resume (requested range not satisfiable)
# Unable to resume (requested range not satisfiable)
try :
try :
# Open the connection again without the range header
# Open the connection again without the range header
data = self . ydl . urlopen ( basic_request )
ctx. data = self . ydl . urlopen ( basic_request )
content_length = data. info ( ) [ ' Content-Length ' ]
content_length = ctx. data. info ( ) [ ' Content-Length ' ]
except ( compat_urllib_error . HTTPError , ) as err :
except ( compat_urllib_error . HTTPError , ) as err :
if err . code < 500 or err . code > = 600 :
if err . code < 500 or err . code > = 600 :
raise
raise
else :
else :
# Examine the reported length
# Examine the reported length
if ( content_length is not None and
if ( content_length is not None and
( resume_len - 100 < int ( content_length ) < resume_len + 100 ) ) :
( ctx. resume_len - 100 < int ( content_length ) < ctx . resume_len + 100 ) ) :
# The file had already been fully downloaded.
# The file had already been fully downloaded.
# Explanation to the above condition: in issue #175 it was revealed that
# Explanation to the above condition: in issue #175 it was revealed that
# YouTube sometimes adds or removes a few bytes from the end of the file,
# YouTube sometimes adds or removes a few bytes from the end of the file,
@ -102,36 +115,30 @@ class HttpFD(FileDownloader):
# I decided to implement a suggested change and consider the file
# I decided to implement a suggested change and consider the file
# completely downloaded if the file size differs less than 100 bytes from
# completely downloaded if the file size differs less than 100 bytes from
# the one in the hard drive.
# the one in the hard drive.
self . report_file_already_downloaded ( filename)
self . report_file_already_downloaded ( ctx. filename)
self . try_rename ( tmpfilename, filename )
self . try_rename ( ctx. tmpfilename, ctx . filename )
self . _hook_progress ( {
self . _hook_progress ( {
' filename ' : filename,
' filename ' : ctx. filename,
' status ' : ' finished ' ,
' status ' : ' finished ' ,
' downloaded_bytes ' : resume_len,
' downloaded_bytes ' : ctx. resume_len,
' total_bytes ' : resume_len,
' total_bytes ' : ctx. resume_len,
} )
} )
r eturn True
r aise SucceedDownload ( )
else :
else :
# The length does not match, we start the download over
# The length does not match, we start the download over
self . report_unable_to_resume ( )
self . report_unable_to_resume ( )
resume_len = 0
ctx . resume_len = 0
open_mode = ' wb '
ctx . open_mode = ' wb '
break
return
except socket . error as e :
raise RetryDownload ( err )
if e . errno != errno . ECONNRESET :
except socket . error as err :
if err . errno != errno . ECONNRESET :
# Connection reset is no problem, just retry
# Connection reset is no problem, just retry
raise
raise
raise RetryDownload ( err )
# Retry
def download ( ) :
count + = 1
data_len = ctx . data . info ( ) . get ( ' Content-length ' , None )
if count < = retries :
self . report_retry ( count , retries )
if count > retries :
self . report_error ( ' giving up after %s retries ' % retries )
return False
data_len = data . info ( ) . get ( ' Content-length ' , None )
# Range HTTP header may be ignored/unsupported by a webserver
# Range HTTP header may be ignored/unsupported by a webserver
# (e.g. extractor/scivee.py, extractor/bambuser.py).
# (e.g. extractor/scivee.py, extractor/bambuser.py).
@ -142,7 +149,7 @@ class HttpFD(FileDownloader):
data_len = self . _TEST_FILE_SIZE
data_len = self . _TEST_FILE_SIZE
if data_len is not None :
if data_len is not None :
data_len = int ( data_len ) + resume_len
data_len = int ( data_len ) + ctx . resume_len
min_data_len = self . params . get ( ' min_filesize ' )
min_data_len = self . params . get ( ' min_filesize ' )
max_data_len = self . params . get ( ' max_filesize ' )
max_data_len = self . params . get ( ' max_filesize ' )
if min_data_len is not None and data_len < min_data_len :
if min_data_len is not None and data_len < min_data_len :
@ -152,17 +159,34 @@ class HttpFD(FileDownloader):
self . to_screen ( ' \r [download] File is larger than max-filesize ( %s bytes > %s bytes). Aborting. ' % ( data_len , max_data_len ) )
self . to_screen ( ' \r [download] File is larger than max-filesize ( %s bytes > %s bytes). Aborting. ' % ( data_len , max_data_len ) )
return False
return False
byte_counter = 0 + resume_len
byte_counter = 0 + ctx . resume_len
block_size = self . params . get ( ' buffersize ' , 1024 )
block_size = self . params . get ( ' buffersize ' , 1024 )
start = time . time ( )
start = time . time ( )
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
# measure time over whole while-loop, so slow_down() and best_block_size() work together properly
now = None # needed for slow_down() in the first loop run
now = None # needed for slow_down() in the first loop run
before = start # start measuring
before = start # start measuring
while True :
def retry ( e ) :
if ctx . tmpfilename != ' - ' :
ctx . stream . close ( )
ctx . stream = None
ctx . resume_len = os . path . getsize ( encodeFilename ( ctx . tmpfilename ) )
raise RetryDownload ( e )
while True :
try :
# Download and write
# Download and write
data_block = data . read ( block_size if not is_test else min ( block_size , data_len - byte_counter ) )
data_block = ctx . data . read ( block_size if not is_test else min ( block_size , data_len - byte_counter ) )
# socket.timeout is a subclass of socket.error but may not have
# errno set
except socket . timeout as e :
retry ( e )
except socket . error as e :
if e . errno not in ( errno . ECONNRESET , errno . ETIMEDOUT ) :
raise
retry ( e )
byte_counter + = len ( data_block )
byte_counter + = len ( data_block )
# exit loop when download is finished
# exit loop when download is finished
@ -170,31 +194,32 @@ class HttpFD(FileDownloader):
break
break
# Open destination file just in time
# Open destination file just in time
if stream is None :
if ctx . stream is None :
try :
try :
( stream , tmpfilename ) = sanitize_open ( tmpfilename , open_mode )
ctx . stream , ctx . tmpfilename = sanitize_open (
assert stream is not None
ctx . tmpfilename , ctx . open_mode )
filename = self . undo_temp_name ( tmpfilename )
assert ctx . stream is not None
self . report_destination ( filename )
ctx . filename = self . undo_temp_name ( ctx . tmpfilename )
self . report_destination ( ctx . filename )
except ( OSError , IOError ) as err :
except ( OSError , IOError ) as err :
self . report_error ( ' unable to open for writing: %s ' % str ( err ) )
self . report_error ( ' unable to open for writing: %s ' % str ( err ) )
return False
return False
if self . params . get ( ' xattr_set_filesize ' , False ) and data_len is not None :
if self . params . get ( ' xattr_set_filesize ' , False ) and data_len is not None :
try :
try :
write_xattr ( tmpfilename , ' user.ytdl.filesize ' , str ( data_len ) . encode ( ' utf-8 ' ) )
write_xattr ( ctx . tmpfilename , ' user.ytdl.filesize ' , str ( data_len ) . encode ( ' utf-8 ' ) )
except ( XAttrUnavailableError , XAttrMetadataError ) as err :
except ( XAttrUnavailableError , XAttrMetadataError ) as err :
self . report_error ( ' unable to set filesize xattr: %s ' % str ( err ) )
self . report_error ( ' unable to set filesize xattr: %s ' % str ( err ) )
try :
try :
stream . write ( data_block )
ctx . stream . write ( data_block )
except ( IOError , OSError ) as err :
except ( IOError , OSError ) as err :
self . to_stderr ( ' \n ' )
self . to_stderr ( ' \n ' )
self . report_error ( ' unable to write data: %s ' % str ( err ) )
self . report_error ( ' unable to write data: %s ' % str ( err ) )
return False
return False
# Apply rate limit
# Apply rate limit
self . slow_down ( start , now , byte_counter - resume_len )
self . slow_down ( start , now , byte_counter - ctx . resume_len )
# end measuring of one loop run
# end measuring of one loop run
now = time . time ( )
now = time . time ( )
@ -207,18 +232,18 @@ class HttpFD(FileDownloader):
before = after
before = after
# Progress message
# Progress message
speed = self . calc_speed ( start , now , byte_counter - resume_len )
speed = self . calc_speed ( start , now , byte_counter - ctx . resume_len )
if data_len is None :
if data_len is None :
eta = None
eta = None
else :
else :
eta = self . calc_eta ( start , time . time ( ) , data_len - resume_len, byte_counter - resume_len )
eta = self . calc_eta ( start , time . time ( ) , data_len - ctx. resume_len, byte_counter - ctx . resume_len )
self . _hook_progress ( {
self . _hook_progress ( {
' status ' : ' downloading ' ,
' status ' : ' downloading ' ,
' downloaded_bytes ' : byte_counter ,
' downloaded_bytes ' : byte_counter ,
' total_bytes ' : data_len ,
' total_bytes ' : data_len ,
' tmpfilename ' : tmpfilename ,
' tmpfilename ' : ctx . tmpfilename ,
' filename ' : filename ,
' filename ' : ctx . filename ,
' eta ' : eta ,
' eta ' : eta ,
' speed ' : speed ,
' speed ' : speed ,
' elapsed ' : now - start ,
' elapsed ' : now - start ,
@ -227,27 +252,47 @@ class HttpFD(FileDownloader):
if is_test and byte_counter == data_len :
if is_test and byte_counter == data_len :
break
break
if stream is None :
if ctx . stream is None :
self . to_stderr ( ' \n ' )
self . to_stderr ( ' \n ' )
self . report_error ( ' Did not get any data blocks ' )
self . report_error ( ' Did not get any data blocks ' )
return False
return False
if tmpfilename != ' - ' :
if ctx . tmpfilename != ' - ' :
stream . close ( )
ctx . stream . close ( )
if data_len is not None and byte_counter != data_len :
if data_len is not None and byte_counter != data_len :
raise ContentTooShortError ( byte_counter , int ( data_len ) )
err = ContentTooShortError ( byte_counter , int ( data_len ) )
self . try_rename ( tmpfilename , filename )
if count < = retries :
retry ( err )
raise err
self . try_rename ( ctx . tmpfilename , ctx . filename )
# Update file modification time
# Update file modification time
if self . params . get ( ' updatetime ' , True ) :
if self . params . get ( ' updatetime ' , True ) :
info_dict [ ' filetime ' ] = self . try_utime ( filename , data . info ( ) . get ( ' last-modified ' , None ) )
info_dict [ ' filetime ' ] = self . try_utime ( ctx. filename, ctx . data . info ( ) . get ( ' last-modified ' , None ) )
self . _hook_progress ( {
self . _hook_progress ( {
' downloaded_bytes ' : byte_counter ,
' downloaded_bytes ' : byte_counter ,
' total_bytes ' : byte_counter ,
' total_bytes ' : byte_counter ,
' filename ' : filename ,
' filename ' : ctx . filename ,
' status ' : ' finished ' ,
' status ' : ' finished ' ,
' elapsed ' : time . time ( ) - start ,
' elapsed ' : time . time ( ) - start ,
} )
} )
return True
return True
while count < = retries :
try :
establish_connection ( )
download ( )
return True
except RetryDownload as e :
count + = 1
if count < = retries :
self . report_retry ( e . source_error , count , retries )
continue
except SucceedDownload :
return True
self . report_error ( ' giving up after %s retries ' % retries )
return False