Merge branch 'sexykarma' of https://github.com/CkuT/youtube-dl into CkuT-sexykarma

11 years ago · 77c3c5c5ed
parent 159444a668 7da224c907
commit 77c3c5c5ed
2 changed files with 78 additions and 0 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -315,6 +315,7 @@ from .sbs import SBSIE
 from .scivee import SciVeeIE
 from .screencast import ScreencastIE
 from .servingsys import ServingSysIE
 from .sexykarma import SexyKarmaIE
 from .shared import SharedIE
 from .sharesix import ShareSixIE
 from .sina import SinaIE
--- a/youtube_dl/extractor/sexykarma.py
+++ b/youtube_dl/extractor/sexykarma.py
@ -0,0 +1,77 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 import re
 import datetime
 class SexyKarmaIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?sexykarma\.com/gonewild/video/.+\-(?P<id>[a-zA-Z0-9\-]+)(.html)'
    _TESTS = [{
        'url': 'http://www.sexykarma.com/gonewild/video/taking-a-quick-pee-yHI70cOyIHt.html',
        'md5': 'b9798e7d1ef1765116a8f516c8091dbd',
        'info_dict': {
            'id': 'yHI70cOyIHt',
            'ext': 'mp4',
            'title': 'Taking a quick pee.',
            'uploader_id': 'wildginger7',
            'thumbnail': 're:^https?://.*\.jpg$',
            'duration': int,
            'view_count': int,
            'upload_date': '20141007',
        }
    }, {
        'url': 'http://www.sexykarma.com/gonewild/video/pot-pixie-tribute-8Id6EZPbuHf.html',
        'md5': 'dd216c68d29b49b12842b9babe762a5d',
        'info_dict': {
            'id': '8Id6EZPbuHf',
            'ext': 'mp4',
            'title': 'pot_pixie tribute',
            'uploader_id': 'banffite',
            'thumbnail': 're:^https?://.*\.jpg$',
            'duration': int,
            'view_count': int,
            'upload_date': '20141013',
        }
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_regex(r'<h2 class="he2"><span>(.*?)</span>', webpage, 'title')
        uploader_id = self._html_search_regex(r'class="aupa">\n*(.*?)</a>', webpage, 'uploader')
        url = self._html_search_regex(r'<p><a href="(.*?)" ?\n*target="_blank"><font color', webpage, 'url')
        thumbnail = self._html_search_regex(r'<div id="player" style="z-index:1;"> <span id="edge"></span> <span id="container"><img[\n ]*src="(.+?)"', webpage, 'thumbnail')
        str_duration = self._html_search_regex(r'<tr>[\n\s]*<td>Time: </td>[\n\s]*<td align="right"><span>(.+)\n*', webpage, 'duration')
        duration = self._to_seconds(str_duration)
        str_views = self._html_search_regex(r'<tr>[\n\s]*<td>Views: </td>[\n\s]*<td align="right"><span>(.+)</span>', webpage, 'view_count')
        view_count = int(str_views)
        # print view_count
        date = self._html_search_regex(r'class="aup">Added: <strong>(.*?)</strong>', webpage, 'date')
        d = datetime.datetime.strptime(date, '%B %d, %Y')
        upload_date = d.strftime('%Y%m%d')
        categories = re.findall(r'http://www.sexykarma.com/gonewild/search/video/(?:.+?)"><span>(.*?)</span>', webpage)
        return {
            'id': video_id,
            'title': title,
            'uploader_id': uploader_id,
            'url': url,
            'thumbnail': thumbnail,
            'duration': duration,
            'view_count': view_count,
            'upload_date': upload_date,
            'categories': categories,
        }
    def _to_seconds(self, timestr):
        seconds= 0
        for part in timestr.split(':'):
            seconds= seconds*60 + int(part)
        return seconds