[webofstories] Tolerate malforder og:title (Closes #8417)

pull/2/head
Sergey M․ 9 years ago
parent 7a0e7779fe
commit 8870bb4653

@ -12,38 +12,52 @@ class WebOfStoriesIE(InfoExtractor):
_VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
_GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
_USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
_TESTS = [
{
'url': 'http://www.webofstories.com/play/hans.bethe/71',
'md5': '373e4dd915f60cfe3116322642ddf364',
'info_dict': {
'id': '4536',
'ext': 'mp4',
'title': 'The temperature of the sun',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Hans Bethe talks about calculating the temperature of the sun',
'duration': 238,
}
_TESTS = [{
'url': 'http://www.webofstories.com/play/hans.bethe/71',
'md5': '373e4dd915f60cfe3116322642ddf364',
'info_dict': {
'id': '4536',
'ext': 'mp4',
'title': 'The temperature of the sun',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Hans Bethe talks about calculating the temperature of the sun',
'duration': 238,
}
}, {
'url': 'http://www.webofstories.com/play/55908',
'md5': '2985a698e1fe3211022422c4b5ed962c',
'info_dict': {
'id': '55908',
'ext': 'mp4',
'title': 'The story of Gemmata obscuriglobus',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
'duration': 169,
},
'skip': 'notfound',
}, {
# malformed og:title meta
'url': 'http://www.webofstories.com/play/54215?o=MS',
'info_dict': {
'id': '54215',
'ext': 'mp4',
'title': '"A Leg to Stand On"',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Oliver Sacks talks about the death and resurrection of a limb',
'duration': 97,
},
{
'url': 'http://www.webofstories.com/play/55908',
'md5': '2985a698e1fe3211022422c4b5ed962c',
'info_dict': {
'id': '55908',
'ext': 'mp4',
'title': 'The story of Gemmata obscuriglobus',
'thumbnail': 're:^https?://.*\.jpg$',
'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
'duration': 169,
}
'params': {
'skip_download': True,
},
]
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._og_search_title(webpage)
# Sometimes og:title meta is malformed
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
r'(?s)<strong>Title:\s*</strong>(.+?)<', webpage, 'title')
description = self._html_search_meta('description', webpage)
thumbnail = self._og_search_thumbnail(webpage)

Loading…
Cancel
Save