Basic support for TED

13 years ago · 3a468f2d8b
parent 906417c7c5
commit 3a468f2d8b
2 changed files with 34 additions and 0 deletions
--- a/test/tests.json
+++ b/test/tests.json
@ -286,5 +286,14 @@
      "title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ."
    }
  },
  {
    "name": "TED",
    "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html",
    "file": "102.mp4",
    "md5": "7bc087e71d16f18f9b8ab9fa62a8a031",
    "info_dict": {
        "title": "Dan Dennett: The illusion of consciousness"
    }
  }
 ]
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@ -3968,6 +3968,30 @@ class KeekIE(InfoExtractor):
        }
        return [info]
 class TEDIE(InfoExtractor):
    _VALID_URL=r'http://www.ted.com/talks/(?P<videoName>\w+)'
    def _real_extract(self, url):
        m=re.match(self._VALID_URL, url)
        videoName=m.group('videoName')
        webpage=self._download_webpage(url, 0, 'Downloading \"%s\" page' % videoName)
        #If the url includes the language we get the title translated
        title_RE=r'<h1><span id="altHeadline" >(?P<title>[\s\w:/\.\?=\+-]*)</span></h1>'
        title=re.search(title_RE, webpage).group('title')
        info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?)
                        "id":(?P<videoID>[\d]+).*?
                        "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"'''
        info_match=re.search(info_RE,webpage,re.VERBOSE)
        video_id=info_match.group('videoID')
        mediaSlug=info_match.group('mediaSlug')
        video_url='http://download.ted.com/talks/%s.mp4' % mediaSlug
        info = {
                'id':video_id,
                'url':video_url,
                'ext': 'mp4',
                'title': title
        }
        return [info]
 def gen_extractors():
    """ Return a list of an instance of every supported extractor.
    The order does matter; the first extractor matched is the one handling the URL.
@ -4015,6 +4039,7 @@ def gen_extractors():
        RBMARadioIE(),
        EightTracksIE(),
        KeekIE(),
        TEDIE(),
        GenericIE()
    ]