mirror of https://github.com/yt-dlp/yt-dlp
Merge remote-tracking branch 'upstream' into biliSearchPageIE
commit
eac8a89b47
@ -1,63 +1,50 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj
|
||||
from .vidyard import VidyardBaseIE, VidyardIE
|
||||
from ..utils import ExtractorError, make_archive_id, url_basename
|
||||
|
||||
|
||||
class CellebriteIE(InfoExtractor):
|
||||
class CellebriteIE(VidyardBaseIE):
|
||||
_VALID_URL = r'https?://cellebrite\.com/(?:\w+)?/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cellebrite.com/en/collect-data-from-android-devices-with-cellebrite-ufed/',
|
||||
'info_dict': {
|
||||
'id': '16025876',
|
||||
'id': 'ZqmUss3dQfEMGpauambPuH',
|
||||
'display_id': '16025876',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:174571cb97083fd1d457d75c684f4e2b',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
|
||||
'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED',
|
||||
'duration': 455,
|
||||
'tags': [],
|
||||
'description': 'md5:dee48fe12bbae5c01fe6a053f7676da4',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
|
||||
'duration': 455.979,
|
||||
'_old_archive_ids': ['cellebrite 16025876'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/',
|
||||
'info_dict': {
|
||||
'id': '29018255',
|
||||
'id': 'QV1U8a2yzcxigw7VFnqKyg',
|
||||
'display_id': '29018255',
|
||||
'ext': 'mp4',
|
||||
'duration': 134,
|
||||
'tags': [],
|
||||
'description': 'md5:e9a3d124c7287b0b07bad2547061cacf',
|
||||
'title': 'How to Lawfully Collect the Maximum Amount of Data From Android Devices',
|
||||
'description': 'md5:0e943a9ac14c374d5d74faed634d773c',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png',
|
||||
'title': 'Android Extractions Explained',
|
||||
'duration': 134.315,
|
||||
'_old_archive_ids': ['cellebrite 29018255'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitles(self, json_data, display_id):
|
||||
formats = [{'url': url} for url in traverse_obj(json_data, ('mp4', ..., 'url')) or []]
|
||||
subtitles = {}
|
||||
|
||||
for url in traverse_obj(json_data, ('hls', ..., 'url')) or []:
|
||||
fmt, sub = self._extract_m3u8_formats_and_subtitles(
|
||||
url, display_id, ext='mp4', headers={'Referer': 'https://play.vidyard.com/'})
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(sub, target=subtitles)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player_uuid = self._search_regex(
|
||||
r'<img\s[^>]*\bdata-uuid\s*=\s*"([^"\?]+)', webpage, 'player UUID')
|
||||
json_data = self._download_json(
|
||||
f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0]
|
||||
slug = self._match_id(url)
|
||||
webpage = self._download_webpage(url, slug)
|
||||
vidyard_url = next(VidyardIE._extract_embed_urls(url, webpage), None)
|
||||
if not vidyard_url:
|
||||
raise ExtractorError('No Vidyard video embeds found on page')
|
||||
|
||||
video_id = url_basename(vidyard_url)
|
||||
info = self._process_video_json(self._fetch_video_json(video_id)['chapters'][0], video_id)
|
||||
if info.get('display_id'):
|
||||
info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])]
|
||||
if thumbnail := self._og_search_thumbnail(webpage, default=None):
|
||||
info.setdefault('thumbnails', []).append({'url': thumbnail})
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id)
|
||||
return {
|
||||
'id': str(json_data['videoId']),
|
||||
'title': json_data.get('name') or self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': json_data.get('description') or self._og_search_description(webpage),
|
||||
'duration': json_data.get('seconds'),
|
||||
'tags': json_data.get('tags'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'http_headers': {'Referer': 'https://play.vidyard.com/'},
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
**info,
|
||||
}
|
||||
|
@ -1,115 +0,0 @@
|
||||
import random
|
||||
import string
|
||||
import urllib.parse
|
||||
|
||||
from .discoverygo import DiscoveryGoBaseIE
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import ExtractorError
|
||||
|
||||
|
||||
class DiscoveryIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = r'''(?x)https?://
|
||||
(?P<site>
|
||||
go\.discovery|
|
||||
www\.
|
||||
(?:
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc
|
||||
)|
|
||||
watch\.
|
||||
(?:
|
||||
hgtv|
|
||||
foodnetwork|
|
||||
travelchannel|
|
||||
diynetwork|
|
||||
cookingchanneltv|
|
||||
motortrend
|
||||
)
|
||||
)\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
|
||||
_TESTS = [{
|
||||
'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
|
||||
'info_dict': {
|
||||
'id': '5a2f35ce6b66d17a5026e29e',
|
||||
'ext': 'mp4',
|
||||
'title': 'Riding with Matthew Perry',
|
||||
'description': 'md5:a34333153e79bc4526019a5129e7f878',
|
||||
'duration': 84,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True, # requires ffmpeg
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# using `show_slug` is important to get the correct video data
|
||||
'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_GEO_BYPASS = False
|
||||
_API_BASE_URL = 'https://api.discovery.com/v1/'
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, show_slug, display_id = self._match_valid_url(url).groups()
|
||||
|
||||
access_token = None
|
||||
cookies = self._get_cookies(url)
|
||||
|
||||
# prefer Affiliate Auth Token over Anonymous Auth Token
|
||||
auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
|
||||
if auth_storage_cookie and auth_storage_cookie.value:
|
||||
auth_storage = self._parse_json(urllib.parse.unquote(
|
||||
urllib.parse.unquote(auth_storage_cookie.value)),
|
||||
display_id, fatal=False) or {}
|
||||
access_token = auth_storage.get('a') or auth_storage.get('access_token')
|
||||
|
||||
if not access_token:
|
||||
access_token = self._download_json(
|
||||
f'https://{site}.com/anonymous', display_id,
|
||||
'Downloading token JSON metadata', query={
|
||||
'authRel': 'authorization',
|
||||
'client_id': '3020a40c2356a645b4b4',
|
||||
'nonce': ''.join(random.choices(string.ascii_letters, k=32)),
|
||||
'redirectUri': 'https://www.discovery.com/',
|
||||
})['access_token']
|
||||
|
||||
headers = self.geo_verification_headers()
|
||||
headers['Authorization'] = 'Bearer ' + access_token
|
||||
|
||||
try:
|
||||
video = self._download_json(
|
||||
self._API_BASE_URL + 'content/videos',
|
||||
display_id, 'Downloading content JSON metadata',
|
||||
headers=headers, query={
|
||||
'embed': 'show.name',
|
||||
'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
|
||||
'slug': display_id,
|
||||
'show_slug': show_slug,
|
||||
})[0]
|
||||
video_id = video['id']
|
||||
stream = self._download_json(
|
||||
self._API_BASE_URL + 'streaming/video/' + video_id,
|
||||
display_id, 'Downloading streaming JSON metadata', headers=headers)
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
|
||||
e_description = self._parse_json(
|
||||
e.cause.response.read().decode(), display_id)['description']
|
||||
if 'resource not available for country' in e_description:
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
if 'Authorized Networks' in e_description:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
raise ExtractorError(e_description)
|
||||
raise
|
||||
|
||||
return self._extract_video_info(video, stream, display_id)
|
@ -1,171 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class DiscoveryGoBaseIE(InfoExtractor):
|
||||
_VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?:
|
||||
discovery|
|
||||
investigationdiscovery|
|
||||
discoverylife|
|
||||
animalplanet|
|
||||
ahctv|
|
||||
destinationamerica|
|
||||
sciencechannel|
|
||||
tlc|
|
||||
velocitychannel
|
||||
)go\.com/%s(?P<id>[^/?#&]+)'''
|
||||
|
||||
def _extract_video_info(self, video, stream, display_id):
|
||||
title = video['name']
|
||||
|
||||
if not stream:
|
||||
if video.get('authenticated') is True:
|
||||
raise ExtractorError(
|
||||
'This video is only available via cable service provider subscription that'
|
||||
' is not currently supported. You may want to use --cookies.', expected=True)
|
||||
else:
|
||||
raise ExtractorError('Unable to find stream')
|
||||
STREAM_URL_SUFFIX = 'streamUrl'
|
||||
formats = []
|
||||
for stream_kind in ('', 'hds'):
|
||||
suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
|
||||
stream_url = stream.get(f'{stream_kind}{suffix}')
|
||||
if not stream_url:
|
||||
continue
|
||||
if stream_kind == '':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif stream_kind == 'hds':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
stream_url, display_id, f4m_id=stream_kind, fatal=False))
|
||||
|
||||
video_id = video.get('id') or display_id
|
||||
description = video.get('description', {}).get('detailed')
|
||||
duration = int_or_none(video.get('duration'))
|
||||
|
||||
series = video.get('show', {}).get('name')
|
||||
season_number = int_or_none(video.get('season', {}).get('number'))
|
||||
episode_number = int_or_none(video.get('episodeNumber'))
|
||||
|
||||
tags = video.get('tags')
|
||||
age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
|
||||
|
||||
subtitles = {}
|
||||
captions = stream.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
subtitle_url = url_or_none(caption.get('fileUrl'))
|
||||
if not subtitle_url or not subtitle_url.startswith('http'):
|
||||
continue
|
||||
lang = caption.get('fileLang', 'en')
|
||||
ext = determine_ext(subtitle_url)
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': 'ttml' if ext == 'xml' else ext,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'series': series,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'tags': tags,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class DiscoveryGoIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_TEST = {
|
||||
'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/',
|
||||
'info_dict': {
|
||||
'id': '58c167d86b66d12f2addeb01',
|
||||
'ext': 'mp4',
|
||||
'title': 'Reaper Madness',
|
||||
'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78',
|
||||
'duration': 2519,
|
||||
'series': 'Bering Sea Gold',
|
||||
'season_number': 8,
|
||||
'episode_number': 6,
|
||||
'age_limit': 14,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
container = extract_attributes(
|
||||
self._search_regex(
|
||||
r'(<div[^>]+class=["\']video-player-container[^>]+>)',
|
||||
webpage, 'video container'))
|
||||
|
||||
video = self._parse_json(
|
||||
container.get('data-video') or container.get('data-json'),
|
||||
display_id)
|
||||
|
||||
stream = video.get('stream')
|
||||
|
||||
return self._extract_video_info(video, stream, display_id)
|
||||
|
||||
|
||||
class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE):
|
||||
_VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % ''
|
||||
_TEST = {
|
||||
'url': 'https://www.discoverygo.com/bering-sea-gold/',
|
||||
'info_dict': {
|
||||
'id': 'bering-sea-gold',
|
||||
'title': 'Bering Sea Gold',
|
||||
'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e',
|
||||
},
|
||||
'playlist_mincount': 6,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if DiscoveryGoIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(r'data-json=(["\'])(?P<json>{.+?})\1', webpage):
|
||||
data = self._parse_json(
|
||||
mobj.group('json'), display_id,
|
||||
transform_source=unescapeHTML, fatal=False)
|
||||
if not isinstance(data, dict) or data.get('type') != 'episode':
|
||||
continue
|
||||
episode_url = data.get('socialUrl')
|
||||
if not episode_url:
|
||||
continue
|
||||
entries.append(self.url_result(
|
||||
episode_url, ie=DiscoveryGoIE.ie_key(),
|
||||
video_id=data.get('id')))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, display_id,
|
||||
remove_end(self._og_search_title(
|
||||
webpage, fatal=False), ' | Discovery GO'),
|
||||
self._og_search_description(webpage))
|
@ -0,0 +1,78 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_by_class,
|
||||
get_element_html_by_id,
|
||||
join_nonempty,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LearningOnScreenIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://learningonscreen\.ac\.uk/ondemand/index\.php/prog/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://learningonscreen.ac.uk/ondemand/index.php/prog/005D81B2?bcast=22757013',
|
||||
'info_dict': {
|
||||
'id': '005D81B2',
|
||||
'ext': 'mp4',
|
||||
'title': 'Planet Earth',
|
||||
'duration': 3600.0,
|
||||
'timestamp': 1164567600.0,
|
||||
'upload_date': '20061126',
|
||||
'thumbnail': 'https://stream.learningonscreen.ac.uk/trilt-cover-images/005D81B2-Planet-Earth-2006-11-26T190000Z-BBC4.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
|
||||
self.raise_login_required(
|
||||
'Use --cookies for authentication. See '
|
||||
' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
|
||||
'for how to manually pass cookies', method=None)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
details = traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'programme-details')}, {
|
||||
'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}),
|
||||
'timestamp': (
|
||||
{functools.partial(get_element_by_class, 'broadcast-date')},
|
||||
{functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
|
||||
'duration': (
|
||||
{functools.partial(get_element_by_class, 'prog-running-time')},
|
||||
{clean_html}, {parse_duration}),
|
||||
}))
|
||||
|
||||
title = details.pop('title', None) or traverse_obj(webpage, (
|
||||
{functools.partial(get_element_html_by_id, 'add-to-existing-playlist')},
|
||||
{extract_attributes}, 'data-record-title', {clean_html}))
|
||||
|
||||
entries = self._parse_html5_media_entries(
|
||||
'https://stream.learningonscreen.ac.uk', webpage, video_id, m3u8_id='hls', mpd_id='dash',
|
||||
_headers={'Origin': 'https://learningonscreen.ac.uk', 'Referer': 'https://learningonscreen.ac.uk/'})
|
||||
if not entries:
|
||||
raise ExtractorError('No video found')
|
||||
|
||||
if len(entries) > 1:
|
||||
duration = details.pop('duration', None)
|
||||
for idx, entry in enumerate(entries, start=1):
|
||||
entry.update(details)
|
||||
entry['id'] = join_nonempty(video_id, idx)
|
||||
entry['title'] = join_nonempty(title, idx)
|
||||
return self.playlist_result(entries, video_id, title, duration=duration)
|
||||
|
||||
return {
|
||||
**entries[0],
|
||||
**details,
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
}
|
@ -0,0 +1,426 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
mimetype2ext,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class VidyardBaseIE(InfoExtractor):
|
||||
_HEADERS = {'Referer': 'https://play.vidyard.com/'}
|
||||
|
||||
def _get_formats_and_subtitles(self, sources, video_id):
|
||||
formats, subtitles = [], {}
|
||||
|
||||
def add_hls_fmts_and_subs(m3u8_url):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', headers=self._HEADERS, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
hls_list = isinstance(sources, dict) and sources.pop('hls', None)
|
||||
if master_m3u8_url := traverse_obj(
|
||||
hls_list, (lambda _, v: v['profile'] == 'auto', 'url', {url_or_none}, any)):
|
||||
add_hls_fmts_and_subs(master_m3u8_url)
|
||||
if not formats: # These are duplicate and unnecesary requests if we got 'auto' hls fmts
|
||||
for variant_m3u8_url in traverse_obj(hls_list, (..., 'url', {url_or_none})):
|
||||
add_hls_fmts_and_subs(variant_m3u8_url)
|
||||
|
||||
for source_type, source_list in traverse_obj(sources, ({dict.items}, ...)):
|
||||
for source in traverse_obj(source_list, lambda _, v: url_or_none(v['url'])):
|
||||
profile = source.get('profile')
|
||||
formats.append({
|
||||
'url': source['url'],
|
||||
'ext': mimetype2ext(source.get('mimeType'), default=None),
|
||||
'format_id': join_nonempty('http', source_type, profile),
|
||||
**parse_resolution(profile),
|
||||
})
|
||||
|
||||
self._remove_duplicate_formats(formats)
|
||||
return formats, subtitles
|
||||
|
||||
def _get_direct_subtitles(self, caption_json):
|
||||
subs = {}
|
||||
for caption in traverse_obj(caption_json, lambda _, v: url_or_none(v['vttUrl'])):
|
||||
subs.setdefault(caption.get('language') or 'und', []).append({
|
||||
'url': caption['vttUrl'],
|
||||
'name': caption.get('name'),
|
||||
})
|
||||
|
||||
return subs
|
||||
|
||||
def _fetch_video_json(self, video_id):
|
||||
return self._download_json(
|
||||
f'https://play.vidyard.com/player/{video_id}.json', video_id)['payload']
|
||||
|
||||
def _process_video_json(self, json_data, video_id):
|
||||
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], video_id)
|
||||
self._merge_subtitles(self._get_direct_subtitles(json_data.get('captions')), target=subtitles)
|
||||
|
||||
return {
|
||||
**traverse_obj(json_data, {
|
||||
'id': ('facadeUuid', {str}),
|
||||
'display_id': ('videoId', {int}, {str_or_none}),
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}, {unescapeHTML}, {lambda x: x or None}),
|
||||
'duration': ((
|
||||
('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
||||
('seconds', {int_or_none})), any),
|
||||
'thumbnails': ('thumbnailUrls', ('small', 'normal'), {'url': {url_or_none}}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': self._HEADERS,
|
||||
}
|
||||
|
||||
|
||||
class VidyardIE(VidyardBaseIE):
|
||||
_VALID_URL = [
|
||||
r'https?://[\w-]+(?:\.hubs)?\.vidyard\.com/watch/(?P<id>[\w-]+)',
|
||||
r'https?://(?:embed|share)\.vidyard\.com/share/(?P<id>[\w-]+)',
|
||||
r'https?://play\.vidyard\.com/(?:player/)?(?P<id>[\w-]+)',
|
||||
]
|
||||
_EMBED_REGEX = [r'<iframe[^>]* src=["\'](?P<url>(?:https?:)?//play\.vidyard\.com/[\w-]+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://vyexample03.hubs.vidyard.com/watch/oTDMPlUv--51Th455G5u7Q',
|
||||
'info_dict': {
|
||||
'id': 'oTDMPlUv--51Th455G5u7Q',
|
||||
'display_id': '50347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Homepage Video',
|
||||
'description': 'Look I changed the description.',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/50347/OUPa5LTKV46849sLYngMqQ_small.jpg',
|
||||
'duration': 99,
|
||||
'tags': ['these', 'are', 'all', 'tags'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://share.vidyard.com/watch/PaQzDAT1h8JqB8ivEu2j6Y?',
|
||||
'info_dict': {
|
||||
'id': 'PaQzDAT1h8JqB8ivEu2j6Y',
|
||||
'display_id': '9281024',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inline Embed',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/spacer.gif',
|
||||
'duration': 41.186,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://embed.vidyard.com/share/oTDMPlUv--51Th455G5u7Q',
|
||||
'info_dict': {
|
||||
'id': 'oTDMPlUv--51Th455G5u7Q',
|
||||
'display_id': '50347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Homepage Video',
|
||||
'description': 'Look I changed the description.',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/50347/OUPa5LTKV46849sLYngMqQ_small.jpg',
|
||||
'duration': 99,
|
||||
'tags': ['these', 'are', 'all', 'tags'],
|
||||
},
|
||||
}, {
|
||||
# First video from playlist below
|
||||
'url': 'https://embed.vidyard.com/share/SyStyHtYujcBHe5PkZc5DL',
|
||||
'info_dict': {
|
||||
'id': 'SyStyHtYujcBHe5PkZc5DL',
|
||||
'display_id': '41974005',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prepare the Frame and Track for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'description': r're:In this video, you will learn how to prepare the frame.+',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/41974005/IJw7oCaJcF1h7WWu3OVZ8A_small.png',
|
||||
'duration': 258.666,
|
||||
},
|
||||
}, {
|
||||
# Playlist
|
||||
'url': 'https://thelink.hubs.vidyard.com/watch/pwu7pCYWSwAnPxs8nDoFrE',
|
||||
'info_dict': {
|
||||
'id': 'pwu7pCYWSwAnPxs8nDoFrE',
|
||||
'title': 'PLAYLIST - Palm Beach Shutters- Bi-Fold Track System Installation',
|
||||
'entries': [{
|
||||
'id': 'SyStyHtYujcBHe5PkZc5DL',
|
||||
'display_id': '41974005',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prepare the Frame and Track for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/41974005/IJw7oCaJcF1h7WWu3OVZ8A_small.png',
|
||||
'duration': 258.666,
|
||||
}, {
|
||||
'id': '1Fw4B84jZTXLXWqkE71RiM',
|
||||
'display_id': '5861113',
|
||||
'ext': 'mp4',
|
||||
'title': 'Palm Beach - Bi-Fold Track System "Frame Installation"',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861113/29CJ54s5g1_aP38zkKLHew_small.jpg',
|
||||
'duration': 167.858,
|
||||
}, {
|
||||
'id': 'DqP3wBvLXSpxrcqpT5kEeo',
|
||||
'display_id': '41976334',
|
||||
'ext': 'mp4',
|
||||
'title': 'Install the Track for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861090/RwG2VaTylUa6KhSTED1r1Q_small.png',
|
||||
'duration': 94.229,
|
||||
}, {
|
||||
'id': 'opfybfxpzQArxqtQYB6oBU',
|
||||
'display_id': '41976364',
|
||||
'ext': 'mp4',
|
||||
'title': 'Install the Panel for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5860926/JIOaJR08dM4QgXi_iQ2zGA_small.png',
|
||||
'duration': 191.467,
|
||||
}, {
|
||||
'id': 'rWrXvkbTNNaNqD6189HJya',
|
||||
'display_id': '41976382',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adjust the Panels for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5860687/CwHxBv4UudAhOh43FVB4tw_small.png',
|
||||
'duration': 138.155,
|
||||
}, {
|
||||
'id': 'eYPTB521MZ9TPEArSethQ5',
|
||||
'display_id': '41976409',
|
||||
'ext': 'mp4',
|
||||
'title': 'Assemble and Install the Valance for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861425/0y68qlMU4O5VKU7bJ8i_AA_small.png',
|
||||
'duration': 148.224,
|
||||
}],
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
# Non hubs.vidyard.com playlist
|
||||
'url': 'https://salesforce.vidyard.com/watch/d4vqPjs7Q5EzVEis5QT3jd',
|
||||
'info_dict': {
|
||||
'id': 'd4vqPjs7Q5EzVEis5QT3jd',
|
||||
'title': 'How To: Service Cloud: Import External Content in Lightning Knowledge',
|
||||
'entries': [{
|
||||
'id': 'mcjDpSZir2iSttbvFkx6Rv',
|
||||
'display_id': '29479036',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome to this Expert Coaching Series',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/ouyQi9WuwyiOupChUWNmjQ/7170d3485ba602e012df05_small.jpg',
|
||||
'duration': 38.205,
|
||||
}, {
|
||||
'id': '84bPYwpg243G6xYEfJdYw9',
|
||||
'display_id': '21820704',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 1 - Title + Agenda',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/HFPN0ZgQq4Ow8BghGcQSow/bfaa30123c8f6601e7d7f2_small.jpg',
|
||||
'duration': 98.016,
|
||||
}, {
|
||||
'id': 'nP17fMuvA66buVHUrzqjTi',
|
||||
'display_id': '21820707',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 2 - Import Options',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/rGRIF5nFjPI9OOA2qJ_Dbg/86a8d02bfec9a566845dd4_small.jpg',
|
||||
'duration': 199.136,
|
||||
}, {
|
||||
'id': 'm54EcwXdpA5gDBH5rgCYoV',
|
||||
'display_id': '21820710',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 3 - Importing Article Translations',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/IVX4XR8zpSsiNIHx45kz-A/1ccbf8a29a33856d06b3ed_small.jpg',
|
||||
'duration': 184.352,
|
||||
}, {
|
||||
'id': 'j4nzS42oq4hE9oRV73w3eQ',
|
||||
'display_id': '21820716',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 4 - Best Practices',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/BtrRrQpRDLbA4AT95YQyog/1f1e6b8e7fdc3fa95ec8d3_small.jpg',
|
||||
'duration': 296.960,
|
||||
}, {
|
||||
'id': 'y28PYfW5pftvers9PXzisC',
|
||||
'display_id': '21820727',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 5 - Migration Steps',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/K2CdQOXDfLcrVTF60r0bdw/a09239ada28b6ffce12b1f_small.jpg',
|
||||
'duration': 620.640,
|
||||
}, {
|
||||
'id': 'YWU1eQxYvhj29SjYoPw5jH',
|
||||
'display_id': '21820733',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 6 - Demo',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/rsmhP-cO8dAa8ilvFGCX0g/7911ef415167cd14032068_small.jpg',
|
||||
'duration': 631.456,
|
||||
}, {
|
||||
'id': 'nmEvVqpwdJUgb74zKsLGxn',
|
||||
'display_id': '29479037',
|
||||
'ext': 'mp4',
|
||||
'title': 'Schedule Your Follow-Up',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/Rtwc7X4PEkF4Ae5kHi-Jvw/174ebed3f34227b1ffa1d0_small.jpg',
|
||||
'duration': 33.608,
|
||||
}],
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
# URL of iframe embed src
|
||||
'url': 'https://play.vidyard.com/iDqTwWGrd36vaLuaCY3nTs.html',
|
||||
'info_dict': {
|
||||
'id': 'iDqTwWGrd36vaLuaCY3nTs',
|
||||
'display_id': '9281009',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lightbox Embed',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/spacer.gif',
|
||||
'duration': 39.035,
|
||||
},
|
||||
}, {
|
||||
# Player JSON URL
|
||||
'url': 'https://play.vidyard.com/player/7GAApnNNbcZZ46k6JqJQSh.json?disable_analytics=0',
|
||||
'info_dict': {
|
||||
'id': '7GAApnNNbcZZ46k6JqJQSh',
|
||||
'display_id': '820026',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Art of Storytelling: How to Deliver Your Brand Story with Content & Social',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/MhbE-5sEFQu4x3fI6FkNlA/41eb5717c557cd19456910_small.jpg',
|
||||
'duration': 2153.013,
|
||||
'tags': ['Summit2017'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://share.vidyard.com/share/diYeo6YR2yiGgL8odvS8Ri',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.vidyard.com/FFlz3ZpxhIfKQ1fd9DAryA',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.vidyard.com/qhMAu5A76GZVrFzOPgSf9A/type/standalone',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# URL containing inline/lightbox embedded video
|
||||
'url': 'https://resources.altium.com/p/2-the-extreme-importance-of-pc-board-stack-up',
|
||||
'info_dict': {
|
||||
'id': 'GDx1oXrFWj4XHbipfoXaMn',
|
||||
'display_id': '3225198',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Extreme Importance of PC Board Stack Up',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/73_Q3_hBexWX7Og1sae6cg/9998fa4faec921439e2c04_small.jpg',
|
||||
'duration': 3422.742,
|
||||
},
|
||||
}, {
|
||||
# <script ... id="vidyard_embed_code_DXx2sW4WaLA6hTdGFz7ja8" src="//play.vidyard.com/DXx2sW4WaLA6hTdGFz7ja8.js?
|
||||
'url': 'http://videos.vivint.com/watch/DXx2sW4WaLA6hTdGFz7ja8',
|
||||
'info_dict': {
|
||||
'id': 'DXx2sW4WaLA6hTdGFz7ja8',
|
||||
'display_id': '2746529',
|
||||
'ext': 'mp4',
|
||||
'title': 'How To Powercycle the Smart Hub Panel',
|
||||
'duration': 30.613,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/_-6cw8xQUJ3qiCs_JENc_A/b21d7a5e47967f49399d30_small.jpg',
|
||||
},
|
||||
}, {
|
||||
# <script id="vidyard_embed_code_MIBHhiLVTxga7wqLsuoDjQ" src="//embed.vidyard.com/embed/MIBHhiLVTxga7wqLsuoDjQ/inline?v=2.1">
|
||||
'url': 'https://www.babypips.com/learn/forex/introduction-to-metatrader4',
|
||||
'info_dict': {
|
||||
'id': 'MIBHhiLVTxga7wqLsuoDjQ',
|
||||
'display_id': '20291',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lesson 1 - Opening an MT4 Account',
|
||||
'description': 'Never heard of MetaTrader4? Here\'s the 411 on the popular trading platform!',
|
||||
'duration': 168,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/20291/IM-G2WXQR9VBLl2Cmzvftg_small.jpg',
|
||||
},
|
||||
}, {
|
||||
# <iframe ... src="//play.vidyard.com/d61w8EQoZv1LDuPxDkQP2Q/type/background?preview=1"
|
||||
'url': 'https://www.avaya.com/en/',
|
||||
'info_dict': {
|
||||
# These values come from the generic extractor and don't matter
|
||||
'id': str,
|
||||
'title': str,
|
||||
'age_limit': 0,
|
||||
'upload_date': str,
|
||||
'description': str,
|
||||
'thumbnail': str,
|
||||
'timestamp': float,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'd61w8EQoZv1LDuPxDkQP2Q',
|
||||
'display_id': '42456529',
|
||||
'ext': 'mp4',
|
||||
'title': 'GettyImages-1027',
|
||||
'duration': 6.0,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/42061563/p6bY08d2N4e4IDz-7J4_wkgsPq3-qgcx_small.jpg',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'VAsYDi7eiqZRbHodUA2meC',
|
||||
'display_id': '42456569',
|
||||
'ext': 'mp4',
|
||||
'title': 'GettyImages-1325598833',
|
||||
'duration': 6.083,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/42052358/y3qrbDpn_2quWr_5XBi7yzS3UvEI__ZM_small.jpg',
|
||||
},
|
||||
}],
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# <div class="vidyard-player-embed" data-uuid="vpCWTVHw3qrciLtVY94YkS"
|
||||
'url': 'https://www.gogoair.com/',
|
||||
'info_dict': {
|
||||
# These values come from the generic extractor and don't matter
|
||||
'id': str,
|
||||
'title': str,
|
||||
'description': str,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'vpCWTVHw3qrciLtVY94YkS',
|
||||
'display_id': '40780699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Upgrade to AVANCE 100% worth it - Jason Talley, Owner and Pilot, Testimonial',
|
||||
'description': 'md5:f609824839439a51990cef55ffc472aa',
|
||||
'duration': 70.737,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/40780699/KzjfYZz5MZl2gHF_e-4i2c6ib1cLDweQ_small.jpg',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'xAmV9AsLbnitCw35paLBD8',
|
||||
'display_id': '31130867',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brad Keselowski goes faster with Gogo AVANCE inflight Wi-Fi',
|
||||
'duration': 132.565,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/31130867/HknyDtLdm2Eih9JZ4A5XLjhfBX_6HRw5_small.jpg',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'RkkrFRNxfP79nwCQavecpF',
|
||||
'display_id': '39009815',
|
||||
'ext': 'mp4',
|
||||
'title': 'Live Demo of Gogo Galileo',
|
||||
'description': 'md5:e2df497236f4e12c3fef8b392b5f23e0',
|
||||
'duration': 112.128,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/38144873/CWLlxfUbJ4Gh0ThbUum89IsEM4yupzMb_small.jpg',
|
||||
},
|
||||
}],
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
# Handle protocol-less embed URLs
|
||||
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||
if embed_url.startswith('//'):
|
||||
embed_url = f'https:{embed_url}'
|
||||
yield embed_url
|
||||
|
||||
# Extract inline/lightbox embeds
|
||||
for embed_element in re.findall(
|
||||
r'(<(?:img|div)[^>]* class=(["\'])(?:[^>"\']* )?vidyard-player-embed(?: [^>"\']*)?\2[^>]+>)', webpage):
|
||||
if video_id := extract_attributes(embed_element[0]).get('data-uuid'):
|
||||
yield f'https://play.vidyard.com/{video_id}'
|
||||
|
||||
for embed_id in re.findall(r'<script[^>]* id=["\']vidyard_embed_code_([\w-]+)["\']', webpage):
|
||||
yield f'https://play.vidyard.com/{embed_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_json = self._fetch_video_json(video_id)
|
||||
|
||||
if len(video_json['chapters']) == 1:
|
||||
return self._process_video_json(video_json['chapters'][0], video_id)
|
||||
|
||||
return self.playlist_result(
|
||||
[self._process_video_json(chapter, video_id) for chapter in video_json['chapters']],
|
||||
str(video_json['playerUuid']), video_json.get('name'))
|
@ -0,0 +1,108 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes, get_element_html_by_class, remove_start
|
||||
|
||||
|
||||
class VTVGoIE(InfoExtractor):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?vtvgo\.vn/(kho-video|tin-tuc)/[\w.-]*?(?P<id>\d+)(?:\.[a-z]+|/)?(?:$|[?#])',
|
||||
r'https?://(?:www\.)?vtvgo\.vn/digital/detail\.php\?(?:[^#]+&)?content_id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://vtvgo.vn/kho-video/bep-vtv-vit-chao-rieng-so-24-888456.html',
|
||||
'info_dict': {
|
||||
'id': '888456',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bếp VTV | Vịt chao riềng | Số 24',
|
||||
'description': 'md5:2b4e93ec2b954304170d32be288ce2c8',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20230201/VIT-CHAO-RIENG_VTV_638108894672812459.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/tin-tuc/hot-search-1-zlife-khong-ngo-toi-phai-khong-862074',
|
||||
'info_dict': {
|
||||
'id': '862074',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hot Search #1 | Zlife | Không ngờ tới phải không? ',
|
||||
'description': 'md5:e967d0e2efbbebbee8814a55799b4d0f',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20220504/6b9a8552-e71c-46ce-bc9d-50c9bb506f9c.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/kho-video/918311.html',
|
||||
'info_dict': {
|
||||
'id': '918311',
|
||||
'title': 'Cà phê sáng | 05/02/2024 | Tái hiện hình ảnh Hà Nội xưa tại ngôi nhà di sản',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240205/0506_ca_phe_sang_638427226021318322.jpg',
|
||||
'description': 'md5:b121c67948f1ce58e6a036042fc14c1b',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/digital/detail.php?digital_id=168&content_id=918634',
|
||||
'info_dict': {
|
||||
'id': '918634',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gặp nhau cuối năm | Táo quân 2024',
|
||||
'description': 'md5:a1c221e78e5954d29d49b2a11c20513c',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240210/d0f73369-8f03-4108-9edd-83d4bc3997b2.png',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/digital/detail.php?content_id=919358',
|
||||
'info_dict': {
|
||||
'id': '919358',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chúng ta của 8 năm sau | Tập 45 | Dương có bằng chứng, nhân chứng vạch mặt ông Khiêm',
|
||||
'description': 'md5:16ff5208cac6585137f554472a4677f3',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240221/550deff9-7736-4a0e-8b5d-33274d97cd7d.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/kho-video/888456',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
m3u8_url = self._search_regex(
|
||||
r'(?:var\s+link\s*=\s*|addPlayer\()["\'](https://[^"\']+/index\.m3u8)["\']', webpage, 'm3u8 url')
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
|
||||
}
|
||||
|
||||
|
||||
class VTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vtv\.vn/video/[\w-]*?(?P<id>\d+)\.htm'
|
||||
_TESTS = [{
|
||||
'url': 'https://vtv.vn/video/thoi-su-20h-vtv1-12-6-2024-680411.htm',
|
||||
'info_dict': {
|
||||
'id': '680411',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thời sự 20h VTV1 - 12/6/2024 - Video đã phát trên VTV1 | VTV.VN',
|
||||
'thumbnail': 'https://cdn-images.vtv.vn/zoom/600_315/66349b6076cb4dee98746cf1/2024/06/12/thumb/1206-ts-20h-02929741475480320806760.mp4/thumb0.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtv.vn/video/zlife-1-khong-ngo-toi-phai-khong-vtv24-560248.htm',
|
||||
'info_dict': {
|
||||
'id': '560248',
|
||||
'ext': 'mp4',
|
||||
'title': 'ZLife #1: Không ngờ tới phải không? | VTV24 - Video đã phát trên VTV-NEWS | VTV.VN',
|
||||
'description': 'Ai đứng sau vụ việc thay đổi ảnh đại diện trên các trang mạng xã hội của VTV Digital tối 2/5?',
|
||||
'thumbnail': 'https://video-thumbs.mediacdn.vn/zoom/600_315/vtv/2022/5/13/t67s6btf3ji-16524555726231894427334.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_vid = extract_attributes(get_element_html_by_class(
|
||||
'VCSortableInPreviewMode', get_element_html_by_class(
|
||||
'video-highlight-box', webpage)))['data-vid']
|
||||
m3u8_url = f'https://cdn-videos.vtv.vn/{remove_start(data_vid, "vtv.mediacdn.vn/")}/master.m3u8'
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
|
||||
}
|
Loading…
Reference in New Issue