From 3a468f2d8b0261d4f45a7c5837f54edc33acdd8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 17 Feb 2013 17:13:06 +0100 Subject: [PATCH] Basic support for TED --- test/tests.json | 9 +++++++++ youtube_dl/InfoExtractors.py | 25 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/test/tests.json b/test/tests.json index 5c46af2c88..a6782ed4c6 100644 --- a/test/tests.json +++ b/test/tests.json @@ -286,5 +286,14 @@ "title": "test chars: \"'/\\รค<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ." } + }, + { + "name": "TED", + "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html", + "file": "102.mp4", + "md5": "7bc087e71d16f18f9b8ab9fa62a8a031", + "info_dict": { + "title": "Dan Dennett: The illusion of consciousness" + } } ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index ac69f82fe9..742b036d3d 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3967,6 +3967,30 @@ class KeekIE(InfoExtractor): 'uploader': uploader } return [info] + +class TEDIE(InfoExtractor): + _VALID_URL=r'http://www.ted.com/talks/(?P\w+)' + def _real_extract(self, url): + m=re.match(self._VALID_URL, url) + videoName=m.group('videoName') + webpage=self._download_webpage(url, 0, 'Downloading \"%s\" page' % videoName) + #If the url includes the language we get the title translated + title_RE=r'

(?P[\s\w:/\.\?=\+-]*)</span></h1>' + title=re.search(title_RE, webpage).group('title') + info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?) + "id":(?P<videoID>[\d]+).*? + "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"''' + info_match=re.search(info_RE,webpage,re.VERBOSE) + video_id=info_match.group('videoID') + mediaSlug=info_match.group('mediaSlug') + video_url='http://download.ted.com/talks/%s.mp4' % mediaSlug + info = { + 'id':video_id, + 'url':video_url, + 'ext': 'mp4', + 'title': title + } + return [info] def gen_extractors(): """ Return a list of an instance of every supported extractor. @@ -4015,6 +4039,7 @@ def gen_extractors(): RBMARadioIE(), EightTracksIE(), KeekIE(), + TEDIE(), GenericIE() ]