|
|
@ -6,7 +6,10 @@ import re
|
|
|
|
from .common import InfoExtractor
|
|
|
|
from .common import InfoExtractor
|
|
|
|
|
|
|
|
|
|
|
|
from ..compat import compat_str
|
|
|
|
from ..compat import compat_str
|
|
|
|
from ..utils import int_or_none
|
|
|
|
from ..utils import (
|
|
|
|
|
|
|
|
int_or_none,
|
|
|
|
|
|
|
|
try_get,
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TEDIE(InfoExtractor):
|
|
|
|
class TEDIE(InfoExtractor):
|
|
|
@ -113,8 +116,9 @@ class TEDIE(InfoExtractor):
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_info(self, webpage):
|
|
|
|
def _extract_info(self, webpage):
|
|
|
|
info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>',
|
|
|
|
info_json = self._search_regex(
|
|
|
|
webpage, 'info json')
|
|
|
|
r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>',
|
|
|
|
|
|
|
|
webpage, 'info json')
|
|
|
|
return json.loads(info_json)
|
|
|
|
return json.loads(info_json)
|
|
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
def _real_extract(self, url):
|
|
|
@ -136,11 +140,16 @@ class TEDIE(InfoExtractor):
|
|
|
|
webpage = self._download_webpage(url, name,
|
|
|
|
webpage = self._download_webpage(url, name,
|
|
|
|
'Downloading playlist webpage')
|
|
|
|
'Downloading playlist webpage')
|
|
|
|
info = self._extract_info(webpage)
|
|
|
|
info = self._extract_info(webpage)
|
|
|
|
playlist_info = info['playlist']
|
|
|
|
|
|
|
|
|
|
|
|
playlist_info = try_get(
|
|
|
|
|
|
|
|
info, lambda x: x['__INITIAL_DATA__']['playlist'],
|
|
|
|
|
|
|
|
dict) or info['playlist']
|
|
|
|
|
|
|
|
|
|
|
|
playlist_entries = [
|
|
|
|
playlist_entries = [
|
|
|
|
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
|
|
|
self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key())
|
|
|
|
for talk in info['talks']
|
|
|
|
for talk in try_get(
|
|
|
|
|
|
|
|
info, lambda x: x['__INITIAL_DATA__']['talks'],
|
|
|
|
|
|
|
|
dict) or info['talks']
|
|
|
|
]
|
|
|
|
]
|
|
|
|
return self.playlist_result(
|
|
|
|
return self.playlist_result(
|
|
|
|
playlist_entries,
|
|
|
|
playlist_entries,
|
|
|
@ -149,9 +158,14 @@ class TEDIE(InfoExtractor):
|
|
|
|
|
|
|
|
|
|
|
|
def _talk_info(self, url, video_name):
|
|
|
|
def _talk_info(self, url, video_name):
|
|
|
|
webpage = self._download_webpage(url, video_name)
|
|
|
|
webpage = self._download_webpage(url, video_name)
|
|
|
|
self.report_extraction(video_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
talk_info = self._extract_info(webpage)['talks'][0]
|
|
|
|
info = self._extract_info(webpage)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
talk_info = try_get(
|
|
|
|
|
|
|
|
info, lambda x: x['__INITIAL_DATA__']['talks'][0],
|
|
|
|
|
|
|
|
dict) or info['talks'][0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
title = talk_info['title'].strip()
|
|
|
|
|
|
|
|
|
|
|
|
external = talk_info.get('external')
|
|
|
|
external = talk_info.get('external')
|
|
|
|
if external:
|
|
|
|
if external:
|
|
|
@ -165,19 +179,27 @@ class TEDIE(InfoExtractor):
|
|
|
|
'url': ext_url or external['uri'],
|
|
|
|
'url': ext_url or external['uri'],
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
native_downloads = try_get(
|
|
|
|
|
|
|
|
talk_info, lambda x: x['downloads']['nativeDownloads'],
|
|
|
|
|
|
|
|
dict) or talk_info['nativeDownloads']
|
|
|
|
|
|
|
|
|
|
|
|
formats = [{
|
|
|
|
formats = [{
|
|
|
|
'url': format_url,
|
|
|
|
'url': format_url,
|
|
|
|
'format_id': format_id,
|
|
|
|
'format_id': format_id,
|
|
|
|
'format': format_id,
|
|
|
|
'format': format_id,
|
|
|
|
} for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None]
|
|
|
|
} for (format_id, format_url) in native_downloads.items() if format_url is not None]
|
|
|
|
if formats:
|
|
|
|
if formats:
|
|
|
|
for f in formats:
|
|
|
|
for f in formats:
|
|
|
|
finfo = self._NATIVE_FORMATS.get(f['format_id'])
|
|
|
|
finfo = self._NATIVE_FORMATS.get(f['format_id'])
|
|
|
|
if finfo:
|
|
|
|
if finfo:
|
|
|
|
f.update(finfo)
|
|
|
|
f.update(finfo)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
player_talk = talk_info['player_talks'][0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
resources_ = player_talk.get('resources') or talk_info.get('resources')
|
|
|
|
|
|
|
|
|
|
|
|
http_url = None
|
|
|
|
http_url = None
|
|
|
|
for format_id, resources in talk_info['resources'].items():
|
|
|
|
for format_id, resources in resources_.items():
|
|
|
|
if format_id == 'h264':
|
|
|
|
if format_id == 'h264':
|
|
|
|
for resource in resources:
|
|
|
|
for resource in resources:
|
|
|
|
h264_url = resource.get('file')
|
|
|
|
h264_url = resource.get('file')
|
|
|
@ -237,14 +259,11 @@ class TEDIE(InfoExtractor):
|
|
|
|
|
|
|
|
|
|
|
|
video_id = compat_str(talk_info['id'])
|
|
|
|
video_id = compat_str(talk_info['id'])
|
|
|
|
|
|
|
|
|
|
|
|
thumbnail = talk_info['thumb']
|
|
|
|
|
|
|
|
if not thumbnail.startswith('http'):
|
|
|
|
|
|
|
|
thumbnail = 'http://' + thumbnail
|
|
|
|
|
|
|
|
return {
|
|
|
|
return {
|
|
|
|
'id': video_id,
|
|
|
|
'id': video_id,
|
|
|
|
'title': talk_info['title'].strip(),
|
|
|
|
'title': title,
|
|
|
|
'uploader': talk_info['speaker'],
|
|
|
|
'uploader': player_talk.get('speaker') or talk_info.get('speaker'),
|
|
|
|
'thumbnail': thumbnail,
|
|
|
|
'thumbnail': player_talk.get('thumb') or talk_info.get('thumb'),
|
|
|
|
'description': self._og_search_description(webpage),
|
|
|
|
'description': self._og_search_description(webpage),
|
|
|
|
'subtitles': self._get_subtitles(video_id, talk_info),
|
|
|
|
'subtitles': self._get_subtitles(video_id, talk_info),
|
|
|
|
'formats': formats,
|
|
|
|
'formats': formats,
|
|
|
|