Merge branch 'peugeot-tnaflix'

11 years ago · 88fc294f7f
parent 884ae74785 a232bb9551
commit 88fc294f7f
4 changed files with 96 additions and 44 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -86,7 +86,7 @@ from .ellentv import (
    EllenTVClipsIE,
 )
 from .elpais import ElPaisIE
-from .empflix import EmpflixIE
+from .empflix import EMPFlixIE
 from .engadget import EngadgetIE
 from .eporner import EpornerIE
 from .escapist import EscapistIE
@ -348,6 +348,7 @@ from .theplatform import ThePlatformIE
 from .thisav import ThisAVIE
 from .tinypic import TinyPicIE
 from .tlc import TlcIE, TlcDeIE
 from .tnaflix import TNAFlixIE
 from .toutv import TouTvIE
 from .toypics import ToypicsUserIE, ToypicsIE
 from .traileraddict import TrailerAddictIE
--- a/youtube_dl/extractor/empflix.py
+++ b/youtube_dl/extractor/empflix.py
@ -1,58 +1,25 @@
 from __future__ import unicode_literals
-import re
+from .tnaflix import TNAFlixIE
 from .common import InfoExtractor
 from ..utils import fix_xml_ampersands
 class EMPFlixIE(TNAFlixIE):
    _VALID_URL = r'^https?://www\.empflix\.com/videos/(?P<display_id>[0-9a-zA-Z-]+)-(?P<id>[0-9]+)\.html'
    _TITLE_REGEX = r'name="title" value="(?P<title>[^"]*)"'
    _DESCRIPTION_REGEX = r'name="description" value="([^"]*)"'
    _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
 class EmpflixIE(InfoExtractor):
    _VALID_URL = r'^https?://www\.empflix\.com/videos/.*?-(?P<id>[0-9]+)\.html'
    _TEST = {
        'url': 'http://www.empflix.com/videos/Amateur-Finger-Fuck-33051.html',
        'md5': 'b1bc15b6412d33902d6e5952035fcabc',
        'info_dict': {
            'id': '33051',
            'display_id': 'Amateur-Finger-Fuck',
            'ext': 'mp4',
            'title': 'Amateur Finger Fuck',
            'description': 'Amateur solo finger fucking.',
            'thumbnail': 're:https?://.*\.jpg$',
            'age_limit': 18,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        age_limit = self._rta_search(webpage)
        video_title = self._html_search_regex(
            r'name="title" value="(?P<title>[^"]*)"', webpage, 'title')
        video_description = self._html_search_regex(
            r'name="description" value="([^"]*)"', webpage, 'description', fatal=False)
        cfg_url = self._html_search_regex(
            r'flashvars\.config = escape\("([^"]+)"',
            webpage, 'flashvars.config')
        cfg_xml = self._download_xml(
            cfg_url, video_id, note='Downloading metadata',
            transform_source=fix_xml_ampersands)
        formats = [
            {
                'url': item.find('videoLink').text,
                'format_id': item.find('res').text,
            } for item in cfg_xml.findall('./quality/item')
        ]
        thumbnail = cfg_xml.find('./startThumb').text
        return {
            'id': video_id,
            'title': video_title,
            'description': video_description,
            'thumbnail': thumbnail,
            'formats': formats,
            'age_limit': age_limit,
        }
--- a/youtube_dl/extractor/tnaflix.py
+++ b/youtube_dl/extractor/tnaflix.py
@ -0,0 +1,84 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    parse_duration,
    fix_xml_ampersands,
 )
 class TNAFlixIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?tnaflix\.com/(?P<cat_id>[\w-]+)/(?P<display_id>[\w-]+)/video(?P<id>\d+)'
    _TITLE_REGEX = None
    _DESCRIPTION_REGEX = r'<h3 itemprop="description">([^<]+)</h3>'
    _CONFIG_REGEX = r'flashvars\.config\s*=\s*escape\("([^"]+)"'
    _TEST = {
        'url': 'http://www.tnaflix.com/porn-stars/Carmella-Decesare-striptease/video553878',
        'md5': 'ecf3498417d09216374fc5907f9c6ec0',
        'info_dict': {
            'id': '553878',
            'display_id': 'Carmella-Decesare-striptease',
            'ext': 'mp4',
            'title': 'Carmella Decesare - striptease',
            'description': '',
            'thumbnail': 're:https?://.*\.jpg$',
            'duration': 91,
            'age_limit': 18,
        }
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        display_id = mobj.group('display_id')
        webpage = self._download_webpage(url, display_id)
        title = self._html_search_regex(
            self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
        description = self._html_search_regex(
            self._DESCRIPTION_REGEX, webpage, 'description', fatal=False, default='')
        age_limit = self._rta_search(webpage)
        duration = self._html_search_meta('duration', webpage, 'duration', default=None)
        if duration:
            duration = parse_duration(duration[1:])
        cfg_url = self._html_search_regex(
            self._CONFIG_REGEX, webpage, 'flashvars.config')
        cfg_xml = self._download_xml(
            cfg_url, display_id, note='Downloading metadata',
            transform_source=fix_xml_ampersands)
        thumbnail = cfg_xml.find('./startThumb').text
        formats = []
        for item in cfg_xml.findall('./quality/item'):
            video_url = re.sub('speed=\d+', 'speed=', item.find('videoLink').text)
            format_id = item.find('res').text
            fmt = {
                'url': video_url,
                'format_id': format_id,
            }
            m = re.search(r'^(\d+)', format_id)
            if m:
                fmt['height'] = int(m.group(1))
            formats.append(fmt)
        self._sort_formats(formats)
        return {
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'age_limit': age_limit,
            'formats': formats,
        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -1336,7 +1336,7 @@ def parse_duration(s):
    s = s.strip()
    m = re.match(
-        r'(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
+        r'(?i)(?:(?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?(?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$', s)
    if not m:
        return None
    res = int(m.group('secs'))