Merge remote-tracking branch 'aajanki/wdr_live'

11 years ago · feccc3ff37
parent 265bfa2c79 b8988b63a6
commit feccc3ff37
3 changed files with 155 additions and 51 deletions
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@ -11,6 +11,7 @@ from .common import FileDownloader
 from .http import HttpFD
 from ..compat import (
    compat_urlparse,
    compat_urllib_error,
 )
 from ..utils import (
    struct_pack,
@ -121,7 +122,8 @@ class FlvReader(io.BytesIO):
        self.read_unsigned_int()  # BootstrapinfoVersion
        # Profile,Live,Update,Reserved
-        self.read(1)
+        flags = self.read_unsigned_char()
        live = flags & 0x20 != 0
        # time scale
        self.read_unsigned_int()
        # CurrentMediaTime
@ -160,6 +162,7 @@ class FlvReader(io.BytesIO):
        return {
            'segments': segments,
            'fragments': fragments,
            'live': live,
        }
    def read_bootstrap_info(self):
@ -182,6 +185,10 @@ def build_fragments_list(boot_info):
    for segment, fragments_count in segment_run_table['segment_run']:
        for _ in range(fragments_count):
            res.append((segment, next(fragments_counter)))
    if boot_info['live']:
        res = res[-2:]
    return res
@ -246,6 +253,38 @@ class F4mFD(FileDownloader):
            self.report_error('Unsupported DRM')
        return media
    def _get_bootstrap_from_url(self, bootstrap_url):
        bootstrap = self.ydl.urlopen(bootstrap_url).read()
        return read_bootstrap_info(bootstrap)
    def _update_live_fragments(self, bootstrap_url, latest_fragment):
        fragments_list = []
        retries = 30
        while (not fragments_list) and (retries > 0):
            boot_info = self._get_bootstrap_from_url(bootstrap_url)
            fragments_list = build_fragments_list(boot_info)
            fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
            if not fragments_list:
                # Retry after a while
                time.sleep(5.0)
                retries -= 1
        if not fragments_list:
            self.report_error('Failed to update fragments')
        return fragments_list
    def _parse_bootstrap_node(self, node, base_url):
        if node.text is None:
            bootstrap_url = compat_urlparse.urljoin(
                base_url, node.attrib['url'])
            boot_info = self._get_bootstrap_from_url(bootstrap_url)
        else:
            bootstrap_url = None
            bootstrap = base64.b64decode(node.text)
            boot_info = read_bootstrap_info(bootstrap)
        return (boot_info, bootstrap_url)
    def real_download(self, filename, info_dict):
        man_url = info_dict['url']
        requested_bitrate = info_dict.get('tbr')
@ -265,18 +304,13 @@ class F4mFD(FileDownloader):
        base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
        bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
-        if bootstrap_node.text is None:
+        boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
-            bootstrap_url = compat_urlparse.urljoin(
+        live = boot_info['live']
                base_url, bootstrap_node.attrib['url'])
            bootstrap = self.ydl.urlopen(bootstrap_url).read()
        else:
            bootstrap = base64.b64decode(bootstrap_node.text)
        metadata_node = media.find(_add_ns('metadata'))
        if metadata_node is not None:
            metadata = base64.b64decode(metadata_node.text)
        else:
            metadata = None
        boot_info = read_bootstrap_info(bootstrap)
        fragments_list = build_fragments_list(boot_info)
        if self.params.get('test', False):
@ -301,7 +335,8 @@ class F4mFD(FileDownloader):
        (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
        write_flv_header(dest_stream)
-        write_metadata_tag(dest_stream, metadata)
+        if not live:
            write_metadata_tag(dest_stream, metadata)
        # This dict stores the download progress, it's updated by the progress
        # hook
@ -348,24 +383,45 @@ class F4mFD(FileDownloader):
        http_dl.add_progress_hook(frag_progress_hook)
        frags_filenames = []
-        for (seg_i, frag_i) in fragments_list:
+        while fragments_list:
            seg_i, frag_i = fragments_list.pop(0)
            name = 'Seg%d-Frag%d' % (seg_i, frag_i)
            url = base_url + name
            if akamai_pv:
                url += '?' + akamai_pv.strip(';')
            frag_filename = '%s-%s' % (tmpfilename, name)
-            success = http_dl.download(frag_filename, {'url': url})
+            try:
-            if not success:
+                success = http_dl.download(frag_filename, {'url': url})
-                return False
+                if not success:
-            with open(frag_filename, 'rb') as down:
+                    return False
-                down_data = down.read()
+                with open(frag_filename, 'rb') as down:
-                reader = FlvReader(down_data)
+                    down_data = down.read()
-                while True:
+                    reader = FlvReader(down_data)
-                    _, box_type, box_data = reader.read_box_info()
+                    while True:
-                    if box_type == b'mdat':
+                        _, box_type, box_data = reader.read_box_info()
-                        dest_stream.write(box_data)
+                        if box_type == b'mdat':
-                        break
+                            dest_stream.write(box_data)
-            frags_filenames.append(frag_filename)
+                            break
                if live:
                    os.remove(frag_filename)
                else:
                    frags_filenames.append(frag_filename)
            except (compat_urllib_error.HTTPError, ) as err:
                if live and (err.code == 404 or err.code == 410):
                    # We didn't keep up with the live window. Continue
                    # with the next available fragment.
                    msg = 'Fragment %d unavailable' % frag_i
                    self.report_warning(msg)
                    fragments_list = []
                else:
                    raise
            if not fragments_list and live and bootstrap_url:
                fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
                total_frags += len(fragments_list)
                if fragments_list and (fragments_list[0][1] > frag_i + 1):
                    msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
                    self.report_warning(msg)
        dest_stream.close()
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -921,39 +921,57 @@ class InfoExtractor(object):
        formats = []
        rtmp_count = 0
-        for video in smil.findall('./body/switch/video'):
+        if smil.findall('./body/seq/video'):
-            src = video.get('src')
+            video = smil.findall('./body/seq/video')[0]
-            if not src:
+            fmts, rtmp_count = self._parse_smil_video(video, base, rtmp_count)
-                continue
+            formats.extend(fmts)
-            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+        else:
-            width = int_or_none(video.get('width'))
+            for video in smil.findall('./body/switch/video'):
-            height = int_or_none(video.get('height'))
+                fmts, rtmp_count = self._parse_smil_video(video, base, rtmp_count)
-            proto = video.get('proto')
+                formats.extend(fmts)
-            if not proto:
+
                if base:
                    if base.startswith('rtmp'):
                        proto = 'rtmp'
                    elif base.startswith('http'):
                        proto = 'http'
            ext = video.get('ext')
            if proto == 'm3u8':
                formats.extend(self._extract_m3u8_formats(src, video_id, ext))
            elif proto == 'rtmp':
                rtmp_count += 1
                streamer = video.get('streamer') or base
                formats.append({
                    'url': streamer,
                    'play_path': src,
                    'ext': 'flv',
                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
                    'tbr': bitrate,
                    'width': width,
                    'height': height,
                })
        self._sort_formats(formats)
        return formats
    def _parse_smil_video(self, video, base, rtmp_count):
        src = video.get('src')
        if not src:
            return ([], rtmp_count)
        bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
        width = int_or_none(video.get('width'))
        height = int_or_none(video.get('height'))
        proto = video.get('proto')
        if not proto:
            if base:
                if base.startswith('rtmp'):
                    proto = 'rtmp'
                elif base.startswith('http'):
                    proto = 'http'
        ext = video.get('ext')
        if proto == 'm3u8':
            return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
        elif proto == 'rtmp':
            rtmp_count += 1
            streamer = video.get('streamer') or base
            return ([{
                'url': streamer,
                'play_path': src,
                'ext': 'flv',
                'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
                'tbr': bitrate,
                'width': width,
                'height': height,
            }], rtmp_count)
        elif proto.startswith('http'):
            return ([{
                'url': base + src,
                'ext': ext or 'flv',
                'tbr': bitrate,
                'width': width,
                'height': height,
            }], rtmp_count)
    def _live_title(self, name):
        """ Generate the title for a live video """
        now = datetime.datetime.now()
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@ -28,6 +28,7 @@ class WDRIE(InfoExtractor):
                'title': 'Servicezeit',
                'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
                'upload_date': '20140310',
                'is_live': False
            },
            'params': {
                'skip_download': True,
@ -41,6 +42,7 @@ class WDRIE(InfoExtractor):
                'title': 'Marga Spiegel ist tot',
                'description': 'md5:2309992a6716c347891c045be50992e4',
                'upload_date': '20140311',
                'is_live': False
            },
            'params': {
                'skip_download': True,
@ -55,6 +57,7 @@ class WDRIE(InfoExtractor):
                'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
                'description': 'md5:2309992a6716c347891c045be50992e4',
                'upload_date': '20091129',
                'is_live': False
            },
        },
        {
@ -66,6 +69,7 @@ class WDRIE(InfoExtractor):
                'title': 'Flavia Coelho: Amar é Amar',
                'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
                'upload_date': '20140717',
                'is_live': False
            },
        },
        {
@ -74,6 +78,20 @@ class WDRIE(InfoExtractor):
            'info_dict': {
                'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
            }
        },
        {
            'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html',
            'info_dict': {
                'id': 'mdb-103364',
                'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
                'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
                'ext': 'flv',
                'upload_date': '20150212',
                'is_live': True
            },
            'params': {
                'skip_download': True,
            },
        }
    ]
@ -119,6 +137,10 @@ class WDRIE(InfoExtractor):
        video_url = flashvars['dslSrc'][0]
        title = flashvars['trackerClipTitle'][0]
        thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
        is_live = flashvars.get('isLive', ['0'])[0] == '1'
        if is_live:
            title = self._live_title(title)
        if 'trackerClipAirTime' in flashvars:
            upload_date = flashvars['trackerClipAirTime'][0]
@ -131,6 +153,13 @@ class WDRIE(InfoExtractor):
        if video_url.endswith('.f4m'):
            video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18'
            ext = 'flv'
        elif video_url.endswith('.smil'):
            fmt = self._extract_smil_formats(video_url, page_id)[0]
            video_url = fmt['url']
            sep = '&' if '?' in video_url else '?'
            video_url += sep
            video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43'
            ext = fmt['ext']
        else:
            ext = determine_ext(video_url)
@ -144,6 +173,7 @@ class WDRIE(InfoExtractor):
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'is_live': is_live
        }