Add the 'webpage_url' field to info_dict

The url for the video page, it must allow to reproduce the result.
It's automatically set by YoutubeDL if it's missing.
pull/1710/merge
Jaime Marquínez Ferrándiz 11 years ago
parent b6c45014ae
commit 9103bbc5cd

@ -148,6 +148,9 @@ def generator(test_case):
# Check for the presence of mandatory fields # Check for the presence of mandatory fields
for key in ('id', 'url', 'title', 'ext'): for key in ('id', 'url', 'title', 'ext'):
self.assertTrue(key in info_dict.keys() and info_dict[key]) self.assertTrue(key in info_dict.keys() and info_dict[key])
# Check for mandatory fields that are automatically set by YoutubeDL
for key in ['webpage_url', 'extractor']:
self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
finally: finally:
try_rm_tcs_files() try_rm_tcs_files()

@ -354,8 +354,11 @@ class YoutubeDL(object):
'_type': 'compat_list', '_type': 'compat_list',
'entries': ie_result, 'entries': ie_result,
} }
if 'extractor' not in ie_result: self.add_extra_info(ie_result,
ie_result['extractor'] = ie.IE_NAME {
'extractor': ie.IE_NAME,
'webpage_url': url
})
return self.process_ie_result(ie_result, download, extra_info) return self.process_ie_result(ie_result, download, extra_info)
except ExtractorError as de: # An error we somewhat expected except ExtractorError as de: # An error we somewhat expected
self.report_error(compat_str(de), de.format_traceback()) self.report_error(compat_str(de), de.format_traceback())
@ -417,6 +420,7 @@ class YoutubeDL(object):
'playlist': playlist, 'playlist': playlist,
'playlist_index': i + playliststart, 'playlist_index': i + playliststart,
'extractor': ie_result['extractor'], 'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
} }
entry_result = self.process_ie_result(entry, entry_result = self.process_ie_result(entry,
download=download, download=download,
@ -427,7 +431,10 @@ class YoutubeDL(object):
elif result_type == 'compat_list': elif result_type == 'compat_list':
def _fixup(r): def _fixup(r):
self.add_extra_info(r, self.add_extra_info(r,
{'extractor': ie_result['extractor']}) {
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
})
return r return r
ie_result['entries'] = [ ie_result['entries'] = [
self.process_ie_result(_fixup(r), download, extra_info) self.process_ie_result(_fixup(r), download, extra_info)

@ -71,6 +71,9 @@ class InfoExtractor(object):
("3D" or "DASH video") ("3D" or "DASH video")
* width Width of the video, if known * width Width of the video, if known
* height Height of the video, if known * height Height of the video, if known
webpage_url: The url to the video webpage, if given to youtube-dl it
should allow to get the same result again. (It will be set
by YoutubeDL if it's missing)
Unless mentioned otherwise, the fields should be Unicode strings. Unless mentioned otherwise, the fields should be Unicode strings.

@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com.""" """Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs # _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$' _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
_NETRC_MACHINE = 'vimeo' _NETRC_MACHINE = 'vimeo'
IE_NAME = u'vimeo' IE_NAME = u'vimeo'
_TESTS = [ _TESTS = [
@ -128,11 +128,9 @@ class VimeoIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('id') video_id = mobj.group('id')
if not mobj.group('proto'): if mobj.group('pro') or mobj.group('player'):
url = 'https://' + url
elif mobj.group('pro'):
url = 'http://player.vimeo.com/video/' + video_id url = 'http://player.vimeo.com/video/' + video_id
elif mobj.group('direct_link'): else:
url = 'https://vimeo.com/' + video_id url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
@ -234,7 +232,7 @@ class VimeoIE(InfoExtractor):
if len(formats) == 0: if len(formats) == 0:
raise ExtractorError(u'No known codec found') raise ExtractorError(u'No known codec found')
return [{ return {
'id': video_id, 'id': video_id,
'uploader': video_uploader, 'uploader': video_uploader,
'uploader_id': video_uploader_id, 'uploader_id': video_uploader_id,
@ -243,7 +241,8 @@ class VimeoIE(InfoExtractor):
'thumbnail': video_thumbnail, 'thumbnail': video_thumbnail,
'description': video_description, 'description': video_description,
'formats': formats, 'formats': formats,
}] 'webpage_url': url,
}
class VimeoChannelIE(InfoExtractor): class VimeoChannelIE(InfoExtractor):

@ -1485,7 +1485,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
'subtitles': video_subtitles, 'subtitles': video_subtitles,
'duration': video_duration, 'duration': video_duration,
'age_limit': 18 if age_gate else 0, 'age_limit': 18 if age_gate else 0,
'annotations': video_annotations 'annotations': video_annotations,
'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
}) })
return results return results

Loading…
Cancel
Save