#55 Add aria2c support for DASH (mpd) and HLS (m3u8)

Co-authored-by: Dan <2660574+shirtjs@users.noreply.github.com>
Co-authored-by: pukkandan <pukkandan@gmail.com>
pull/310/head
shirt-dev 4 years ago committed by GitHub
parent ff84930c86
commit 5219cb3e75
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,11 +1,24 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from ..utils import (
determine_protocol,
)
def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
info_copy = info_dict.copy()
if protocol:
info_copy['protocol'] = protocol
return get_suitable_downloader(info_copy, *args, **kwargs)
# Some of these require _get_real_downloader
from .common import FileDownloader from .common import FileDownloader
from .dash import DashSegmentsFD
from .f4m import F4mFD from .f4m import F4mFD
from .hls import HlsFD from .hls import HlsFD
from .http import HttpFD from .http import HttpFD
from .rtmp import RtmpFD from .rtmp import RtmpFD
from .dash import DashSegmentsFD
from .rtsp import RtspFD from .rtsp import RtspFD
from .ism import IsmFD from .ism import IsmFD
from .youtube_live_chat import YoutubeLiveChatReplayFD from .youtube_live_chat import YoutubeLiveChatReplayFD
@ -14,10 +27,6 @@ from .external import (
FFmpegFD, FFmpegFD,
) )
from ..utils import (
determine_protocol,
)
PROTOCOL_MAP = { PROTOCOL_MAP = {
'rtmp': RtmpFD, 'rtmp': RtmpFD,
'm3u8_native': HlsFD, 'm3u8_native': HlsFD,
@ -31,7 +40,7 @@ PROTOCOL_MAP = {
} }
def get_suitable_downloader(info_dict, params={}): def get_suitable_downloader(info_dict, params={}, default=HttpFD):
"""Get the downloader class that can handle the info dict.""" """Get the downloader class that can handle the info dict."""
protocol = determine_protocol(info_dict) protocol = determine_protocol(info_dict)
info_dict['protocol'] = protocol info_dict['protocol'] = protocol
@ -45,16 +54,17 @@ def get_suitable_downloader(info_dict, params={}):
if ed.can_download(info_dict): if ed.can_download(info_dict):
return ed return ed
if protocol.startswith('m3u8') and info_dict.get('is_live'): if protocol.startswith('m3u8'):
return FFmpegFD if info_dict.get('is_live'):
return FFmpegFD
if protocol == 'm3u8' and params.get('hls_prefer_native') is True: elif _get_real_downloader(info_dict, 'frag_urls', params, None):
return HlsFD return HlsFD
elif params.get('hls_prefer_native') is True:
if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False: return HlsFD
return FFmpegFD elif params.get('hls_prefer_native') is False:
return FFmpegFD
return PROTOCOL_MAP.get(protocol, HttpFD) return PROTOCOL_MAP.get(protocol, default)
__all__ = [ __all__ = [

@ -1,6 +1,8 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from ..downloader import _get_real_downloader
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import compat_urllib_error from ..compat import compat_urllib_error
from ..utils import ( from ..utils import (
DownloadError, DownloadError,
@ -20,31 +22,42 @@ class DashSegmentsFD(FragmentFD):
fragments = info_dict['fragments'][:1] if self.params.get( fragments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments'] 'test', False) else info_dict['fragments']
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
ctx = { ctx = {
'filename': filename, 'filename': filename,
'total_frags': len(fragments), 'total_frags': len(fragments),
} }
self._prepare_and_start_frag_download(ctx) if real_downloader:
self._prepare_external_frag_download(ctx)
else:
self._prepare_and_start_frag_download(ctx)
fragment_retries = self.params.get('fragment_retries', 0) fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
fragment_urls = []
frag_index = 0 frag_index = 0
for i, fragment in enumerate(fragments): for i, fragment in enumerate(fragments):
frag_index += 1 frag_index += 1
if frag_index <= ctx['fragment_index']: if frag_index <= ctx['fragment_index']:
continue continue
fragment_url = fragment.get('url')
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
if real_downloader:
fragment_urls.append(fragment_url)
continue
# In DASH, the first segment contains necessary headers to # In DASH, the first segment contains necessary headers to
# generate a valid MP4 file, so always abort for the first segment # generate a valid MP4 file, so always abort for the first segment
fatal = i == 0 or not skip_unavailable_fragments fatal = i == 0 or not skip_unavailable_fragments
count = 0 count = 0
while count <= fragment_retries: while count <= fragment_retries:
try: try:
fragment_url = fragment.get('url')
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
if not success: if not success:
return False return False
@ -75,6 +88,16 @@ class DashSegmentsFD(FragmentFD):
self.report_error('giving up after %s fragment retries' % fragment_retries) self.report_error('giving up after %s fragment retries' % fragment_retries)
return False return False
self._finish_frag_download(ctx) if real_downloader:
info_copy = info_dict.copy()
info_copy['url_list'] = fragment_urls
fd = real_downloader(self.ydl, self.params)
# TODO: Make progress updates work without hooking twice
# for ph in self._progress_hooks:
# fd.add_progress_hook(ph)
success = fd.real_download(filename, info_copy)
if not success:
return False
else:
self._finish_frag_download(ctx)
return True return True

@ -5,6 +5,13 @@ import re
import subprocess import subprocess
import sys import sys
import time import time
import shutil
try:
from Crypto.Cipher import AES
can_decrypt_frag = True
except ImportError:
can_decrypt_frag = False
from .common import FileDownloader from .common import FileDownloader
from ..compat import ( from ..compat import (
@ -18,15 +25,19 @@ from ..utils import (
cli_bool_option, cli_bool_option,
cli_configuration_args, cli_configuration_args,
encodeFilename, encodeFilename,
error_to_compat_str,
encodeArgument, encodeArgument,
handle_youtubedl_headers, handle_youtubedl_headers,
check_executable, check_executable,
is_outdated_version, is_outdated_version,
process_communicate_or_kill, process_communicate_or_kill,
sanitized_Request,
) )
class ExternalFD(FileDownloader): class ExternalFD(FileDownloader):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
@ -79,7 +90,7 @@ class ExternalFD(FileDownloader):
@classmethod @classmethod
def supports(cls, info_dict): def supports(cls, info_dict):
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') return info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS
@classmethod @classmethod
def can_download(cls, info_dict): def can_download(cls, info_dict):
@ -109,8 +120,47 @@ class ExternalFD(FileDownloader):
_, stderr = process_communicate_or_kill(p) _, stderr = process_communicate_or_kill(p)
if p.returncode != 0: if p.returncode != 0:
self.to_stderr(stderr.decode('utf-8', 'replace')) self.to_stderr(stderr.decode('utf-8', 'replace'))
if 'url_list' in info_dict:
file_list = []
for [i, url] in enumerate(info_dict['url_list']):
tmpsegmentname = '%s_%s.frag' % (tmpfilename, i)
file_list.append(tmpsegmentname)
with open(tmpfilename, 'wb') as dest:
for i in file_list:
if 'decrypt_info' in info_dict:
decrypt_info = info_dict['decrypt_info']
with open(i, 'rb') as src:
if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV')
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
encrypted_data = src.read()
decrypted_data = AES.new(
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(encrypted_data)
dest.write(decrypted_data)
else:
shutil.copyfileobj(open(i, 'rb'), dest)
else:
shutil.copyfileobj(open(i, 'rb'), dest)
if not self.params.get('keep_fragments', False):
for file_path in file_list:
try:
os.remove(file_path)
except OSError as ose:
self.report_error("Unable to delete file %s; %s" % (file_path, error_to_compat_str(ose)))
try:
file_path = '%s.frag.urls' % tmpfilename
os.remove(file_path)
except OSError as ose:
self.report_error("Unable to delete file %s; %s" % (file_path, error_to_compat_str(ose)))
return p.returncode return p.returncode
def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers')
return sanitized_Request(url, None, headers) if headers else url
class CurlFD(ExternalFD): class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V' AVAILABLE_OPT = '-V'
@ -186,15 +236,17 @@ class WgetFD(ExternalFD):
class Aria2cFD(ExternalFD): class Aria2cFD(ExternalFD):
AVAILABLE_OPT = '-v' AVAILABLE_OPT = '-v'
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'frag_urls')
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c'] cmd = [self.exe, '-c']
cmd += self._configuration_args([
'--min-split-size', '1M', '--max-connection-per-server', '4'])
dn = os.path.dirname(tmpfilename) dn = os.path.dirname(tmpfilename)
if 'url_list' not in info_dict:
cmd += ['--out', os.path.basename(tmpfilename)]
verbose_level_args = ['--console-log-level=warn', '--summary-interval=0']
cmd += self._configuration_args(['--file-allocation=none', '-x16', '-j16', '-s16'] + verbose_level_args)
if dn: if dn:
cmd += ['--dir', dn] cmd += ['--dir', dn]
cmd += ['--out', os.path.basename(tmpfilename)]
if info_dict.get('http_headers') is not None: if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)] cmd += ['--header', '%s: %s' % (key, val)]
@ -202,7 +254,21 @@ class Aria2cFD(ExternalFD):
cmd += self._option('--all-proxy', 'proxy') cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
cmd += ['--', info_dict['url']] cmd += ['--auto-file-renaming=false']
if 'url_list' in info_dict:
cmd += verbose_level_args
cmd += ['--uri-selector', 'inorder', '--download-result=hide']
url_list_file = '%s.frag.urls' % tmpfilename
url_list = []
for [i, url] in enumerate(info_dict['url_list']):
tmpsegmentname = '%s_%s.frag' % (os.path.basename(tmpfilename), i)
url_list.append('%s\n\tout=%s' % (url, tmpsegmentname))
with open(url_list_file, 'w') as f:
f.write('\n'.join(url_list))
cmd += ['-i', url_list_file]
else:
cmd += ['--', info_dict['url']]
return cmd return cmd
@ -221,9 +287,7 @@ class HttpieFD(ExternalFD):
class FFmpegFD(ExternalFD): class FFmpegFD(ExternalFD):
@classmethod SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
def supports(cls, info_dict):
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
@classmethod @classmethod
def available(cls): def available(cls):

@ -277,3 +277,24 @@ class FragmentFD(FileDownloader):
'status': 'finished', 'status': 'finished',
'elapsed': elapsed, 'elapsed': elapsed,
}) })
def _prepare_external_frag_download(self, ctx):
if 'live' not in ctx:
ctx['live'] = False
if not ctx['live']:
total_frags_str = '%d' % ctx['total_frags']
ad_frags = ctx.get('ad_frags', 0)
if ad_frags:
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
self.to_screen(
'[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
tmpfilename = self.temp_name(ctx['filename'])
# Should be initialized before ytdl file check
ctx.update({
'tmpfilename': tmpfilename,
'fragment_index': 0,
})

@ -8,6 +8,7 @@ try:
except ImportError: except ImportError:
can_decrypt_frag = False can_decrypt_frag = False
from ..downloader import _get_real_downloader
from .fragment import FragmentFD from .fragment import FragmentFD
from .external import FFmpegFD from .external import FFmpegFD
@ -73,10 +74,13 @@ class HlsFD(FragmentFD):
'hlsnative has detected features it does not support, ' 'hlsnative has detected features it does not support, '
'extraction will be delegated to ffmpeg') 'extraction will be delegated to ffmpeg')
fd = FFmpegFD(self.ydl, self.params) fd = FFmpegFD(self.ydl, self.params)
for ph in self._progress_hooks: # TODO: Make progress updates work without hooking twice
fd.add_progress_hook(ph) # for ph in self._progress_hooks:
# fd.add_progress_hook(ph)
return fd.real_download(filename, info_dict) return fd.real_download(filename, info_dict)
real_downloader = _get_real_downloader(info_dict, 'frag_urls', self.params, None)
def is_ad_fragment_start(s): def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
@ -85,6 +89,8 @@ class HlsFD(FragmentFD):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
fragment_urls = []
media_frags = 0 media_frags = 0
ad_frags = 0 ad_frags = 0
ad_frag_next = False ad_frag_next = False
@ -109,7 +115,10 @@ class HlsFD(FragmentFD):
'ad_frags': ad_frags, 'ad_frags': ad_frags,
} }
self._prepare_and_start_frag_download(ctx) if real_downloader:
self._prepare_external_frag_download(ctx)
else:
self._prepare_and_start_frag_download(ctx)
fragment_retries = self.params.get('fragment_retries', 0) fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
@ -140,6 +149,11 @@ class HlsFD(FragmentFD):
else compat_urlparse.urljoin(man_url, line)) else compat_urlparse.urljoin(man_url, line))
if extra_query: if extra_query:
frag_url = update_url_query(frag_url, extra_query) frag_url = update_url_query(frag_url, extra_query)
if real_downloader:
fragment_urls.append(frag_url)
continue
count = 0 count = 0
headers = info_dict.get('http_headers', {}) headers = info_dict.get('http_headers', {})
if byte_range: if byte_range:
@ -168,6 +182,7 @@ class HlsFD(FragmentFD):
self.report_error( self.report_error(
'giving up after %s fragment retries' % fragment_retries) 'giving up after %s fragment retries' % fragment_retries)
return False return False
if decrypt_info['METHOD'] == 'AES-128': if decrypt_info['METHOD'] == 'AES-128':
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
@ -211,6 +226,17 @@ class HlsFD(FragmentFD):
elif is_ad_fragment_end(line): elif is_ad_fragment_end(line):
ad_frag_next = False ad_frag_next = False
self._finish_frag_download(ctx) if real_downloader:
info_copy = info_dict.copy()
info_copy['url_list'] = fragment_urls
info_copy['decrypt_info'] = decrypt_info
fd = real_downloader(self.ydl, self.params)
# TODO: Make progress updates work without hooking twice
# for ph in self._progress_hooks:
# fd.add_progress_hook(ph)
success = fd.real_download(filename, info_copy)
if not success:
return False
else:
self._finish_frag_download(ctx)
return True return True

Loading…
Cancel
Save