Add support for tou.tv (Fixes #1792)

12 years ago · 5904088811
parent 69545c2aff
commit 5904088811
4 changed files with 106 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -133,6 +133,7 @@ from .techtalks import TechTalksIE
 from .ted import TEDIE
 from .tf1 import TF1IE
 from .thisav import ThisAVIE
 from .toutv import TouTvIE
 from .traileraddict import TrailerAddictIE
 from .trilulilu import TriluliluIE
 from .tube8 import Tube8IE
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@ -350,6 +350,17 @@ class InfoExtractor(object):
        if secure: regexes = self._og_regexes('video:secure_url') + regexes
        return self._html_search_regex(regexes, html, name, **kargs)
    def _html_search_meta(self, name, html, display_name=None):
        if display_name is None:
            display_name = name
        return self._html_search_regex(
            r'''(?ix)<meta(?=[^>]+(?:name|property)=["\']%s["\'])
                    [^>]+content=["\']([^"\']+)["\']''' % re.escape(name),
            html, display_name, fatal=False)
    def _dc_search_uploader(self, html):
        return self._html_search_meta('dc.creator', html, 'uploader')
    def _rta_search(self, html):
        # See http://www.rtalabel.org/index.php?content=howtofaq#single
        if re.search(r'(?ix)<meta\s+name="rating"\s+'
@ -358,6 +369,23 @@ class InfoExtractor(object):
            return 18
        return 0
    def _media_rating_search(self, html):
        # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
        rating = self._html_search_meta('rating', html)
        if not rating:
            return None
        RATING_TABLE = {
            'safe for kids': 0,
            'general': 8,
            '14 years': 14,
            'mature': 17,
            'restricted': 19,
        }
        return RATING_TABLE.get(rating.lower(), None)
 class SearchInfoExtractor(InfoExtractor):
    """
--- a/youtube_dl/extractor/toutv.py
+++ b/youtube_dl/extractor/toutv.py
@ -0,0 +1,75 @@
 # coding: utf-8
 import re
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    unified_strdate,
 )
 class TouTvIE(InfoExtractor):
    IE_NAME = u'tou.tv'
    _VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))'
    _TEST = {
        u'url': u'http://www.tou.tv/30-vies/S04E41',
        u'file': u'30-vies_S04E41.mp4',
        u'info_dict': {
            u'title': u'30 vies Saison 4 / Épisode 41',
            u'description': u'md5:da363002db82ccbe4dafeb9cab039b09',
            u'age_limit': 8,
            u'uploader': u'Groupe des Nouveaux Médias',
            u'duration': 1296,
            u'upload_date': u'20131118',
            u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg',
        },
        u'params': {
            u'skip_download': True,  # Requires rtmpdump
        },
        u'xskip': 'Only available in Canada'
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        mediaId = self._search_regex(
            r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
        # TODO test from de
        streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
        streams_webpage = self._download_webpage(
            streams_url, video_id, note=u'Downloading stream list')
        streams_doc = xml.etree.ElementTree.fromstring(
            streams_webpage.encode('utf-8'))
        video_url = next(n.text
                         for n in streams_doc.findall('.//choice/url')
                         if u'//ad.doubleclick' not in n.text)
        if video_url.endswith('/Unavailable.flv'):
            raise ExtractorError(
                u'Access to this video is blocked from outside of Canada',
                expected=True)
        duration_str = self._html_search_meta(
            'video:duration', webpage, u'duration')
        duration = int(duration_str) if duration_str else None
        upload_date_str = self._html_search_meta(
            'video:release_date', webpage, u'upload date')
        upload_date = unified_strdate(upload_date_str) if upload_date_str else None
        return {
            'id': video_id,
            'title': self._og_search_title(webpage),
            'url': video_url,
            'description': self._og_search_description(webpage),
            'uploader': self._dc_search_uploader(webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
            'age_limit': self._media_rating_search(webpage),
            'duration': duration,
            'upload_date': upload_date,
            'ext': 'mp4',
        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -734,6 +734,8 @@ def unified_strdate(date_str):
        '%Y/%m/%d %H:%M:%S',
        '%d.%m.%Y %H:%M',
        '%Y-%m-%dT%H:%M:%SZ',
        '%Y-%m-%dT%H:%M:%S.%fZ',
        '%Y-%m-%dT%H:%M:%S.%f0Z',
        '%Y-%m-%dT%H:%M:%S',
    ]
    for expression in format_expressions: