Merge remote-tracking branch 'yasoob/master'

pull/2/head
Philipp Hagemeister 12 years ago
commit d746cd88c2

@ -686,5 +686,14 @@
"upload_date": "20130624", "upload_date": "20130624",
"uploader": "Hurts" "uploader": "Hurts"
} }
},
{
"name": "Tudou",
"url": "http://www.tudou.com/listplay/zzdE77v6Mmo/2xN2duXMxmw.html",
"file": "159447792.f4v",
"md5": "ad7c358a01541e926a1e413612c6b10a",
"info_dict": {
"title": "卡马乔国足开大脚长传冲吊集锦"
}
} }
] ]

@ -58,6 +58,7 @@ from .youku import YoukuIE
from .youporn import YouPornIE from .youporn import YouPornIE
from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE from .youtube import YoutubeIE, YoutubePlaylistIE, YoutubeSearchIE, YoutubeUserIE, YoutubeChannelIE
from .zdf import ZDFIE from .zdf import ZDFIE
from .tudou import TudouIE
def gen_extractors(): def gen_extractors():
""" Return a list of an instance of every supported extractor. """ Return a list of an instance of every supported extractor.
@ -129,6 +130,7 @@ def gen_extractors():
BreakIE(), BreakIE(),
VevoIE(), VevoIE(),
JukeboxIE(), JukeboxIE(),
TudouIE(),
GenericIE() GenericIE()
] ]

@ -0,0 +1,32 @@
import re
from .common import InfoExtractor
class TudouIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?:www\.)?tudou\.com/(?:listplay|programs)/(?:view|(.+?))/(?:([^/]+)|([^/]+)\.html)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(2).replace('.html','')
webpage = self._download_webpage(url, video_id)
video_id = re.search('"k":(.+?),',webpage).group(1)
title = re.search(",kw:\"(.+)\"",webpage)
if title is None:
title = re.search(",kw: \'(.+)\'",webpage)
title = title.group(1)
thumbnail_url = re.search(",pic: \'(.+?)\'",webpage)
if thumbnail_url is None:
thumbnail_url = re.search(",pic:\"(.+?)\"",webpage)
thumbnail_url = thumbnail_url.group(1)
info_url = "http://v2.tudou.com/f?id="+str(video_id)
webpage = self._download_webpage(info_url, video_id, "Opening the info webpage")
final_url = re.search('\>(.+?)\<\/f\>',webpage).group(1)
ext = (final_url.split('?')[0]).split('.')[-1]
return [{
'id': video_id,
'url': final_url,
'ext': ext,
'title': title,
'thumbnail': thumbnail_url,
}]
Loading…
Cancel
Save