[crackle] Add new extractor

10 years ago · 80f772c28a
parent f817d9bec1
commit 80f772c28a
3 changed files with 102 additions and 7 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -126,6 +126,7 @@ from .comcarcoff import ComCarCoffIE
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
 from .condenast import CondeNastIE
 from .cracked import CrackedIE
 from .crackle import CrackleIE
 from .criterion import CriterionIE
 from .crooksandliars import CrooksAndLiarsIE
 from .crunchyroll import (
--- a/youtube_dl/extractor/comcarcoff.py
+++ b/youtube_dl/extractor/comcarcoff.py
@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    int_or_none,
    parse_duration,
@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor):
    _TESTS = [{
        'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
        'info_dict': {
-            'id': 'miranda-sings-happy-thanksgiving-miranda',
+            'id': '2494164',
            'ext': 'mp4',
            'upload_date': '20141127',
            'timestamp': 1417107600,
            'duration': 1232,
            'title': 'Happy Thanksgiving Miranda',
            'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
            'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg',
        },
        'params': {
            'skip_download': 'requires ffmpeg',
@ -39,15 +39,14 @@ class ComCarCoffIE(InfoExtractor):
                r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
            display_id)['videoData']
-        video_id = full_data['activeVideo']['video']
+        display_id = full_data['activeVideo']['video']
-        video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id]
+        video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
        video_id = compat_str(video_data['mediaId'])
        thumbnails = [{
            'url': video_data['images']['thumb'],
        }, {
            'url': video_data['images']['poster'],
        }]
        formats = self._extract_m3u8_formats(
            video_data['mediaUrl'], video_id, ext='mp4')
        timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
            video_data.get('pubDate'))
@ -55,6 +54,8 @@ class ComCarCoffIE(InfoExtractor):
            video_data.get('duration'))
        return {
            '_type': 'url_transparent',
            'url': 'crackle:%s' % video_id,
            'id': video_id,
            'display_id': display_id,
            'title': video_data['title'],
@ -62,6 +63,7 @@ class ComCarCoffIE(InfoExtractor):
            'timestamp': timestamp,
            'duration': duration,
            'thumbnails': thumbnails,
-            'formats': formats,
+            'season_number': int_or_none(video_data.get('season')),
            'episode_number': int_or_none(video_data.get('episode')),
            'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
        }
--- a/youtube_dl/extractor/crackle.py
+++ b/youtube_dl/extractor/crackle.py
@ -0,0 +1,92 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import int_or_none
 class CrackleIE(InfoExtractor):
    _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
    _TEST = {
        'url': 'http://www.crackle.com/the-art-of-more/2496419',
        'info_dict': {
            'id': '2496419',
            'ext': 'mp4',
            'title': 'Heavy Lies the Head',
            'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        }
    }
    # extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx
    _SUBTITLE_SERVER = 'http://web-us-az.crackle.com'
    _UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b'
    _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614'
    # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx
    _MEDIA_FILE_SLOTS = {
        'c544.flv': {
            'width': 544,
            'height': 306,
        },
        '360p.mp4': {
            'width': 640,
            'height': 360,
        },
        '480p.mp4': {
            'width': 852,
            'height': 478,
        },
        '480p_1mbps.mp4': {
            'width': 852,
            'height': 478,
        },
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        item = self._download_xml(
            'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, video_id).find('i')
        title = item.attrib['t']
        thumbnail = None
        subtitles = {}
        formats = self._extract_m3u8_formats('http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id), video_id, 'mp4', fatal=None)
        path = item.attrib.get('p')
        if path:
            thumbnail = self._THUMBNAIL_TEMPLATE % path
            http_base_url = 'http://ahttp.crackle.com/' + path
            for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items():
                formats.append({
                    'url': http_base_url + mfs_path,
                    'format_id': mfs_path.split('.')[0],
                    'width': mfs_info['width'],
                    'height': mfs_info['height'],
                })
            for cc in item.findall('cc'):
                locale = cc.attrib.get('l')
                v = cc.attrib.get('v')
                if locale and v:
                    if locale not in subtitles:
                        subtitles[locale] = []
                    subtitles[locale] = [{
                        'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v),
                        'ext': 'ttml',
                    }]
        self._sort_formats(formats, ('width', 'height', 'tbr'))
        return {
            'id': video_id,
            'title': title,
            'description': item.attrib.get('d'),
            'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None,
            'series': item.attrib.get('sn'),
            'season_number': int_or_none(item.attrib.get('se')),
            'episode_number': int_or_none(item.attrib.get('ep')),
            'thumbnail': thumbnail,
            'subtitles': subtitles,
            'formats': formats,
        }