mirror of https://github.com/yt-dlp/yt-dlp
Merge branch 'yt-dlp:master' into pr/live-sections
commit
66a6e0a686
@ -1,16 +1,22 @@
|
||||
tests = {
|
||||
'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
|
||||
'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
|
||||
'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
|
||||
'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
|
||||
}
|
||||
|
||||
|
||||
def what(file=None, h=None):
|
||||
"""Detect format of image (Currently supports jpeg, png, webp, gif only)
|
||||
Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py
|
||||
Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py
|
||||
Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
|
||||
"""
|
||||
if h is None:
|
||||
with open(file, 'rb') as f:
|
||||
h = f.read(12)
|
||||
return next((type_ for type_, test in tests.items() if test(h)), None)
|
||||
|
||||
if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8):
|
||||
return 'webp'
|
||||
|
||||
if h.startswith(b'\x89PNG'):
|
||||
return 'png'
|
||||
|
||||
if h.startswith(b'\xFF\xD8\xFF'):
|
||||
return 'jpeg'
|
||||
|
||||
if h.startswith(b'GIF'):
|
||||
return 'gif'
|
||||
|
||||
return None
|
||||
|
@ -1,63 +1,50 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj
|
||||
from .vidyard import VidyardBaseIE, VidyardIE
|
||||
from ..utils import ExtractorError, make_archive_id, url_basename
|
||||
|
||||
|
||||
class CellebriteIE(InfoExtractor):
|
||||
class CellebriteIE(VidyardBaseIE):
|
||||
_VALID_URL = r'https?://cellebrite\.com/(?:\w+)?/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cellebrite.com/en/collect-data-from-android-devices-with-cellebrite-ufed/',
|
||||
'info_dict': {
|
||||
'id': '16025876',
|
||||
'id': 'ZqmUss3dQfEMGpauambPuH',
|
||||
'display_id': '16025876',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:174571cb97083fd1d457d75c684f4e2b',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
|
||||
'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED',
|
||||
'duration': 455,
|
||||
'tags': [],
|
||||
'description': 'md5:dee48fe12bbae5c01fe6a053f7676da4',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2021/05/Chat-Capture-1024x559.png',
|
||||
'duration': 455.979,
|
||||
'_old_archive_ids': ['cellebrite 16025876'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/',
|
||||
'info_dict': {
|
||||
'id': '29018255',
|
||||
'id': 'QV1U8a2yzcxigw7VFnqKyg',
|
||||
'display_id': '29018255',
|
||||
'ext': 'mp4',
|
||||
'duration': 134,
|
||||
'tags': [],
|
||||
'description': 'md5:e9a3d124c7287b0b07bad2547061cacf',
|
||||
'title': 'How to Lawfully Collect the Maximum Amount of Data From Android Devices',
|
||||
'description': 'md5:0e943a9ac14c374d5d74faed634d773c',
|
||||
'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png',
|
||||
'title': 'Android Extractions Explained',
|
||||
'duration': 134.315,
|
||||
'_old_archive_ids': ['cellebrite 29018255'],
|
||||
},
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitles(self, json_data, display_id):
|
||||
formats = [{'url': url} for url in traverse_obj(json_data, ('mp4', ..., 'url')) or []]
|
||||
subtitles = {}
|
||||
|
||||
for url in traverse_obj(json_data, ('hls', ..., 'url')) or []:
|
||||
fmt, sub = self._extract_m3u8_formats_and_subtitles(
|
||||
url, display_id, ext='mp4', headers={'Referer': 'https://play.vidyard.com/'})
|
||||
formats.extend(fmt)
|
||||
self._merge_subtitles(sub, target=subtitles)
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
player_uuid = self._search_regex(
|
||||
r'<img\s[^>]*\bdata-uuid\s*=\s*"([^"\?]+)', webpage, 'player UUID')
|
||||
json_data = self._download_json(
|
||||
f'https://play.vidyard.com/player/{player_uuid}.json', display_id)['payload']['chapters'][0]
|
||||
slug = self._match_id(url)
|
||||
webpage = self._download_webpage(url, slug)
|
||||
vidyard_url = next(VidyardIE._extract_embed_urls(url, webpage), None)
|
||||
if not vidyard_url:
|
||||
raise ExtractorError('No Vidyard video embeds found on page')
|
||||
|
||||
video_id = url_basename(vidyard_url)
|
||||
info = self._process_video_json(self._fetch_video_json(video_id)['chapters'][0], video_id)
|
||||
if info.get('display_id'):
|
||||
info['_old_archive_ids'] = [make_archive_id(self, info['display_id'])]
|
||||
if thumbnail := self._og_search_thumbnail(webpage, default=None):
|
||||
info.setdefault('thumbnails', []).append({'url': thumbnail})
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], display_id)
|
||||
return {
|
||||
'id': str(json_data['videoId']),
|
||||
'title': json_data.get('name') or self._og_search_title(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': json_data.get('description') or self._og_search_description(webpage),
|
||||
'duration': json_data.get('seconds'),
|
||||
'tags': json_data.get('tags'),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'http_headers': {'Referer': 'https://play.vidyard.com/'},
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
**info,
|
||||
}
|
||||
|
@ -0,0 +1,32 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import update_url, url_or_none
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GraspopIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vod\.graspop\.be/[a-z]{2}/(?P<id>\d+)/'
|
||||
_TESTS = [{
|
||||
'url': 'https://vod.graspop.be/fr/101556/thy-art-is-murder-concert/',
|
||||
'info_dict': {
|
||||
'id': '101556',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thy Art Is Murder',
|
||||
'thumbnail': r're:https://cdn-mds\.pickx\.be/festivals/v3/global/original/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
metadata = self._download_json(
|
||||
f'https://tv.proximus.be/MWC/videocenter/festivals/{video_id}/stream', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
# Downgrade manifest request to avoid incomplete certificate chain error
|
||||
update_url(metadata['source']['assetUri'], scheme='http'), video_id, 'mp4'),
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('name', {str}),
|
||||
'thumbnail': ('source', 'poster', {url_or_none}),
|
||||
}),
|
||||
}
|
@ -0,0 +1,114 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_html_by_id,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
str_or_none,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class LaracastsBaseIE(InfoExtractor):
|
||||
def _get_prop_data(self, url, display_id):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
return traverse_obj(
|
||||
get_element_html_by_id('app', webpage),
|
||||
({extract_attributes}, 'data-page', {json.loads}, 'props'))
|
||||
|
||||
def _parse_episode(self, episode):
|
||||
if not traverse_obj(episode, 'vimeoId'):
|
||||
self.raise_login_required('This video is only available for subscribers.')
|
||||
return self.url_result(
|
||||
VimeoIE._smuggle_referrer(
|
||||
f'https://player.vimeo.com/video/{episode["vimeoId"]}', 'https://laracasts.com/'),
|
||||
VimeoIE, url_transparent=True,
|
||||
**traverse_obj(episode, {
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
|
||||
'title': ('title', {clean_html}),
|
||||
'season_number': ('chapter', {int_or_none}),
|
||||
'episode_number': ('position', {int_or_none}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': ('largeThumbnail', {url_or_none}),
|
||||
'duration': ('length', {int_or_none}),
|
||||
'date': ('dateSegments', 'published', {unified_strdate}),
|
||||
}))
|
||||
|
||||
|
||||
class LaracastsIE(LaracastsBaseIE):
|
||||
IE_NAME = 'laracasts'
|
||||
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+/episodes/\d+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1',
|
||||
'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc',
|
||||
'info_dict': {
|
||||
'id': '922040563',
|
||||
'title': 'Hello, Laravel',
|
||||
'ext': 'mp4',
|
||||
'duration': 519,
|
||||
'date': '20240312',
|
||||
'thumbnail': 'https://laracasts.s3.amazonaws.com/videos/thumbnails/youtube/30-days-to-learn-laravel-11-1.png',
|
||||
'description': 'md5:ddd658bb241975871d236555657e1dd1',
|
||||
'season_number': 1,
|
||||
'season': 'Season 1',
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'uploader': 'Laracasts',
|
||||
'uploader_id': 'user20182673',
|
||||
'uploader_url': 'https://vimeo.com/user20182673',
|
||||
},
|
||||
'expected_warnings': ['Failed to parse XML'], # TODO: Remove when vimeo extractor is fixed
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._parse_episode(self._get_prop_data(url, display_id)['lesson'])
|
||||
|
||||
|
||||
class LaracastsPlaylistIE(LaracastsBaseIE):
|
||||
IE_NAME = 'laracasts:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11',
|
||||
'info_dict': {
|
||||
'title': '30 Days to Learn Laravel',
|
||||
'id': '210',
|
||||
'thumbnail': 'https://laracasts.s3.amazonaws.com/series/thumbnails/social-cards/30-days-to-learn-laravel-11.png?v=2',
|
||||
'duration': 30600.0,
|
||||
'modified_date': '20240511',
|
||||
'description': 'md5:27c260a1668a450984e8f901579912dd',
|
||||
'categories': ['Frameworks'],
|
||||
'tags': ['Laravel'],
|
||||
'display_id': '30-days-to-learn-laravel-11',
|
||||
},
|
||||
'playlist_count': 30,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
series = self._get_prop_data(url, display_id)['series']
|
||||
|
||||
metadata = {
|
||||
'display_id': display_id,
|
||||
**traverse_obj(series, {
|
||||
'title': ('title', {str}),
|
||||
'id': ('id', {int}, {str_or_none}),
|
||||
'description': ('body', {clean_html}),
|
||||
'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
|
||||
'duration': ('runTime', {parse_duration}),
|
||||
'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}),
|
||||
'tags': ('topics', ..., 'name', {str}),
|
||||
'modified_date': ('lastUpdated', {unified_strdate}),
|
||||
}),
|
||||
}
|
||||
|
||||
return self.playlist_result(traverse_obj(
|
||||
series, ('chapters', ..., 'episodes', lambda _, v: v['vimeoId'], {self._parse_episode})), **metadata)
|
@ -1,188 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
|
||||
def _extract_base_url(self, course_id, display_id):
|
||||
return self._download_json(
|
||||
f'https://api-mlxprod.microsoft.com/services/products/anonymous/{course_id}',
|
||||
display_id, 'Downloading course base URL')
|
||||
|
||||
def _extract_chapter_and_title(self, title):
|
||||
if not title:
|
||||
return None, None
|
||||
m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
|
||||
return (int(m.group('chapter')), m.group('title')) if m else (None, title)
|
||||
|
||||
|
||||
class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
|
||||
IE_NAME = 'mva'
|
||||
IE_DESC = 'Microsoft Virtual Academy videos'
|
||||
_VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
|
||||
'md5': '7826c44fc31678b12ad8db11f6b5abb9',
|
||||
'info_dict': {
|
||||
'id': 'gfVXISmEB_6804984382',
|
||||
'ext': 'mp4',
|
||||
'title': 'Course Introduction',
|
||||
'formats': 'mincount:3',
|
||||
'subtitles': {
|
||||
'en': [{
|
||||
'ext': 'ttml',
|
||||
}],
|
||||
},
|
||||
},
|
||||
}, {
|
||||
'url': 'mva:11788:gfVXISmEB_6804984382',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
|
||||
mobj = self._match_valid_url(url)
|
||||
course_id = mobj.group('course_id')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
|
||||
|
||||
settings = self._download_xml(
|
||||
f'{base_url}/content/content_{video_id}/videosettings.xml?v=1',
|
||||
video_id, 'Downloading video settings XML')
|
||||
|
||||
_, title = self._extract_chapter_and_title(xpath_text(
|
||||
settings, './/Title', 'title', fatal=True))
|
||||
|
||||
formats = []
|
||||
|
||||
for sources in settings.findall('.//MediaSources'):
|
||||
sources_type = sources.get('videoType')
|
||||
for source in sources.findall('./MediaSource'):
|
||||
video_url = source.text
|
||||
if not video_url or not video_url.startswith('http'):
|
||||
continue
|
||||
if sources_type == 'smoothstreaming':
|
||||
formats.extend(self._extract_ism_formats(
|
||||
video_url, video_id, 'mss', fatal=False))
|
||||
continue
|
||||
video_mode = source.get('videoMode')
|
||||
height = int_or_none(self._search_regex(
|
||||
r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
|
||||
codec = source.get('codec')
|
||||
acodec, vcodec = [None] * 2
|
||||
if codec:
|
||||
codecs = codec.split(',')
|
||||
if len(codecs) == 2:
|
||||
acodec, vcodec = codecs
|
||||
elif len(codecs) == 1:
|
||||
vcodec = codecs[0]
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'format_id': video_mode,
|
||||
'height': height,
|
||||
'acodec': acodec,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for source in settings.findall('.//MarkerResourceSource'):
|
||||
subtitle_url = source.text
|
||||
if not subtitle_url:
|
||||
continue
|
||||
subtitles.setdefault('en', []).append({
|
||||
'url': f'{base_url}/{subtitle_url}',
|
||||
'ext': source.get('type'),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
|
||||
IE_NAME = 'mva:course'
|
||||
IE_DESC = 'Microsoft Virtual Academy courses'
|
||||
_VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
|
||||
'info_dict': {
|
||||
'id': '11788',
|
||||
'title': 'Microsoft Azure Fundamentals: Virtual Machines',
|
||||
},
|
||||
'playlist_count': 36,
|
||||
}, {
|
||||
# with emphasized chapters
|
||||
'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
|
||||
'info_dict': {
|
||||
'id': '16335',
|
||||
'title': 'Developing Windows 10 Games with Construct 2',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'mva:course:11788',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if MicrosoftVirtualAcademyIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
course_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
base_url = self._extract_base_url(course_id, display_id)
|
||||
|
||||
manifest = self._download_json(
|
||||
f'{base_url}/imsmanifestlite.json',
|
||||
display_id, 'Downloading course manifest JSON')['manifest']
|
||||
|
||||
organization = manifest['organizations']['organization'][0]
|
||||
|
||||
entries = []
|
||||
for chapter in organization['item']:
|
||||
chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
|
||||
chapter_id = chapter.get('@identifier')
|
||||
for item in chapter.get('item', []):
|
||||
item_id = item.get('@identifier')
|
||||
if not item_id:
|
||||
continue
|
||||
metadata = item.get('resource', {}).get('metadata') or {}
|
||||
if metadata.get('learningresourcetype') != 'Video':
|
||||
continue
|
||||
_, title = self._extract_chapter_and_title(item.get('title'))
|
||||
duration = parse_duration(metadata.get('duration'))
|
||||
description = metadata.get('description')
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(
|
||||
f'mva:{course_id}:{item_id}', {'base_url': base_url}),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'chapter': chapter_title,
|
||||
'chapter_number': chapter_number,
|
||||
'chapter_id': chapter_id,
|
||||
})
|
||||
|
||||
title = organization.get('title') or manifest.get('metadata', {}).get('title')
|
||||
|
||||
return self.playlist_result(entries, course_id, title)
|
@ -0,0 +1,198 @@
|
||||
import base64
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
qualities,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
unsmuggle_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class SproutVideoIE(InfoExtractor):
|
||||
_NO_SCHEME_RE = r'//videos\.sproutvideo\.com/embed/(?P<id>[\da-f]+)/[\da-f]+'
|
||||
_VALID_URL = rf'https?:{_NO_SCHEME_RE}'
|
||||
_EMBED_REGEX = [rf'<iframe [^>]*\bsrc=["\'](?P<url>(?:https?:)?{_NO_SCHEME_RE}[^"\']*)["\']']
|
||||
_TESTS = [{
|
||||
'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3',
|
||||
'md5': '1343ce1a6cb39d67889bfa07c7b02b0e',
|
||||
'info_dict': {
|
||||
'id': '4c9dddb01910e3c9c4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
|
||||
'duration': 576,
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://videos.sproutvideo.com/embed/a79fdcb21f1be2c62e/93bf31e41e39ca27',
|
||||
'md5': 'cebae5cf558cca83271917cf4ec03f26',
|
||||
'info_dict': {
|
||||
'id': 'a79fdcb21f1be2c62e',
|
||||
'ext': 'mp4',
|
||||
'title': 'HS_01_Live Stream 2023-01-14 10:00',
|
||||
'duration': 703,
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
# http formats 'sd' and 'hd' are available
|
||||
'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90',
|
||||
'md5': 'f368c78df07e78a749508b221528672c',
|
||||
'info_dict': {
|
||||
'id': '119cd6bc1a18e6cd98',
|
||||
'ext': 'mp4',
|
||||
'title': '3. Updating your Partner details',
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
'duration': 60,
|
||||
},
|
||||
'params': {'format': 'hd'},
|
||||
}, {
|
||||
# subtitles
|
||||
'url': 'https://videos.sproutvideo.com/embed/119dd8ba121ee0cc98/4ee50c88a343215d?type=hd',
|
||||
'md5': '7f6798f037d7a3e3e07e67959de68fc6',
|
||||
'info_dict': {
|
||||
'id': '119dd8ba121ee0cc98',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recipients Setup - Domestic Wire Only',
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
'duration': 77,
|
||||
'subtitles': {'en': 'count:1'},
|
||||
},
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs',
|
||||
'info_dict': {
|
||||
'id': '4c9dddb01910e3c9c4',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
|
||||
'duration': 576,
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
_M3U8_URL_TMPL = 'https://{base}.videos.sproutvideo.com/{s3_user_hash}/{s3_video_hash}/video/index.m3u8'
|
||||
_QUALITIES = ('hd', 'uhd', 'source') # Exclude 'sd' to prioritize hls formats above it
|
||||
|
||||
@staticmethod
|
||||
def _policy_to_qs(policy, signature_key, as_string=False):
|
||||
query = {}
|
||||
for key, value in policy['signatures'][signature_key].items():
|
||||
query[remove_start(key, 'CloudFront-')] = value
|
||||
query['sessionID'] = policy['sessionID']
|
||||
return urllib.parse.urlencode(query, doseq=True) if as_string else query
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||
if embed_url.startswith('//'):
|
||||
embed_url = f'https:{embed_url}'
|
||||
yield smuggle_url(embed_url, {'referer': url})
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, {})
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(
|
||||
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
|
||||
data = self._search_json(
|
||||
r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+',
|
||||
end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode())
|
||||
|
||||
formats, subtitles = [], {}
|
||||
headers = {
|
||||
'Accept': '*/*',
|
||||
'Origin': 'https://videos.sproutvideo.com',
|
||||
'Referer': url,
|
||||
}
|
||||
|
||||
# HLS extraction is fatal; only attempt it if the JSON data says it's available
|
||||
if traverse_obj(data, 'hls'):
|
||||
manifest_query = self._policy_to_qs(data, 'm')
|
||||
fragment_query = self._policy_to_qs(data, 't', as_string=True)
|
||||
key_query = self._policy_to_qs(data, 'k', as_string=True)
|
||||
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
self._M3U8_URL_TMPL.format(**data), video_id, 'mp4',
|
||||
m3u8_id='hls', headers=headers, query=manifest_query))
|
||||
for fmt in formats:
|
||||
fmt.update({
|
||||
'url': update_url_query(fmt['url'], manifest_query),
|
||||
'extra_param_to_segment_url': fragment_query,
|
||||
'extra_param_to_key_url': key_query,
|
||||
})
|
||||
|
||||
if downloads := traverse_obj(data, ('downloads', {dict.items}, lambda _, v: url_or_none(v[1]))):
|
||||
quality = qualities(self._QUALITIES)
|
||||
acodec = 'none' if data.get('has_audio') is False else None
|
||||
formats.extend([{
|
||||
'format_id': str(format_id),
|
||||
'url': format_url,
|
||||
'ext': 'mp4',
|
||||
'quality': quality(format_id),
|
||||
'acodec': acodec,
|
||||
} for format_id, format_url in downloads])
|
||||
|
||||
for sub_data in traverse_obj(data, ('subtitleData', lambda _, v: url_or_none(v['src']))):
|
||||
subtitles.setdefault(sub_data.get('srclang', 'en'), []).append({
|
||||
'url': sub_data['src'],
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'thumbnail': ('posterframe_url', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class VidsIoIE(InfoExtractor):
|
||||
IE_NAME = 'vids.io'
|
||||
_VALID_URL = r'https?://[\w-]+\.vids\.io/videos/(?P<id>[\da-f]+)/(?P<display_id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://how-to-video.vids.io/videos/799cd8b11c10efc1f0/how-to-video-live-streaming',
|
||||
'md5': '9bbbb2c0c0739eb163b80f87b8d77c9e',
|
||||
'info_dict': {
|
||||
'id': '799cd8b11c10efc1f0',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Video: Live Streaming',
|
||||
'duration': 2787,
|
||||
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=403)
|
||||
|
||||
if urlh.status == 403:
|
||||
password = self.get_param('videopassword')
|
||||
if not password:
|
||||
raise ExtractorError(
|
||||
'This video is password-protected; use the --video-password option', expected=True)
|
||||
try:
|
||||
webpage = self._download_webpage(
|
||||
url, display_id, 'Submitting video password',
|
||||
data=urlencode_postdata({
|
||||
'password': password,
|
||||
**self._hidden_inputs(webpage),
|
||||
}))
|
||||
# Requests with user's session cookie `_sproutvideo_session` are now authorized
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
raise ExtractorError('Incorrect password', expected=True)
|
||||
raise
|
||||
|
||||
if embed_url := next(SproutVideoIE._extract_embed_urls(url, webpage), None):
|
||||
return self.url_result(embed_url, SproutVideoIE, video_id)
|
||||
|
||||
raise ExtractorError('Unable to extract any SproutVideo embed url')
|
@ -0,0 +1,426 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
mimetype2ext,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class VidyardBaseIE(InfoExtractor):
|
||||
_HEADERS = {'Referer': 'https://play.vidyard.com/'}
|
||||
|
||||
def _get_formats_and_subtitles(self, sources, video_id):
|
||||
formats, subtitles = [], {}
|
||||
|
||||
def add_hls_fmts_and_subs(m3u8_url):
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
m3u8_url, video_id, 'mp4', m3u8_id='hls', headers=self._HEADERS, fatal=False)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
|
||||
hls_list = isinstance(sources, dict) and sources.pop('hls', None)
|
||||
if master_m3u8_url := traverse_obj(
|
||||
hls_list, (lambda _, v: v['profile'] == 'auto', 'url', {url_or_none}, any)):
|
||||
add_hls_fmts_and_subs(master_m3u8_url)
|
||||
if not formats: # These are duplicate and unnecesary requests if we got 'auto' hls fmts
|
||||
for variant_m3u8_url in traverse_obj(hls_list, (..., 'url', {url_or_none})):
|
||||
add_hls_fmts_and_subs(variant_m3u8_url)
|
||||
|
||||
for source_type, source_list in traverse_obj(sources, ({dict.items}, ...)):
|
||||
for source in traverse_obj(source_list, lambda _, v: url_or_none(v['url'])):
|
||||
profile = source.get('profile')
|
||||
formats.append({
|
||||
'url': source['url'],
|
||||
'ext': mimetype2ext(source.get('mimeType'), default=None),
|
||||
'format_id': join_nonempty('http', source_type, profile),
|
||||
**parse_resolution(profile),
|
||||
})
|
||||
|
||||
self._remove_duplicate_formats(formats)
|
||||
return formats, subtitles
|
||||
|
||||
def _get_direct_subtitles(self, caption_json):
|
||||
subs = {}
|
||||
for caption in traverse_obj(caption_json, lambda _, v: url_or_none(v['vttUrl'])):
|
||||
subs.setdefault(caption.get('language') or 'und', []).append({
|
||||
'url': caption['vttUrl'],
|
||||
'name': caption.get('name'),
|
||||
})
|
||||
|
||||
return subs
|
||||
|
||||
def _fetch_video_json(self, video_id):
|
||||
return self._download_json(
|
||||
f'https://play.vidyard.com/player/{video_id}.json', video_id)['payload']
|
||||
|
||||
def _process_video_json(self, json_data, video_id):
|
||||
formats, subtitles = self._get_formats_and_subtitles(json_data['sources'], video_id)
|
||||
self._merge_subtitles(self._get_direct_subtitles(json_data.get('captions')), target=subtitles)
|
||||
|
||||
return {
|
||||
**traverse_obj(json_data, {
|
||||
'id': ('facadeUuid', {str}),
|
||||
'display_id': ('videoId', {int}, {str_or_none}),
|
||||
'title': ('name', {str}),
|
||||
'description': ('description', {str}, {unescapeHTML}, {lambda x: x or None}),
|
||||
'duration': ((
|
||||
('milliseconds', {functools.partial(float_or_none, scale=1000)}),
|
||||
('seconds', {int_or_none})), any),
|
||||
'thumbnails': ('thumbnailUrls', ('small', 'normal'), {'url': {url_or_none}}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
}),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': self._HEADERS,
|
||||
}
|
||||
|
||||
|
||||
class VidyardIE(VidyardBaseIE):
|
||||
_VALID_URL = [
|
||||
r'https?://[\w-]+(?:\.hubs)?\.vidyard\.com/watch/(?P<id>[\w-]+)',
|
||||
r'https?://(?:embed|share)\.vidyard\.com/share/(?P<id>[\w-]+)',
|
||||
r'https?://play\.vidyard\.com/(?:player/)?(?P<id>[\w-]+)',
|
||||
]
|
||||
_EMBED_REGEX = [r'<iframe[^>]* src=["\'](?P<url>(?:https?:)?//play\.vidyard\.com/[\w-]+)']
|
||||
_TESTS = [{
|
||||
'url': 'https://vyexample03.hubs.vidyard.com/watch/oTDMPlUv--51Th455G5u7Q',
|
||||
'info_dict': {
|
||||
'id': 'oTDMPlUv--51Th455G5u7Q',
|
||||
'display_id': '50347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Homepage Video',
|
||||
'description': 'Look I changed the description.',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/50347/OUPa5LTKV46849sLYngMqQ_small.jpg',
|
||||
'duration': 99,
|
||||
'tags': ['these', 'are', 'all', 'tags'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://share.vidyard.com/watch/PaQzDAT1h8JqB8ivEu2j6Y?',
|
||||
'info_dict': {
|
||||
'id': 'PaQzDAT1h8JqB8ivEu2j6Y',
|
||||
'display_id': '9281024',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inline Embed',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/spacer.gif',
|
||||
'duration': 41.186,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://embed.vidyard.com/share/oTDMPlUv--51Th455G5u7Q',
|
||||
'info_dict': {
|
||||
'id': 'oTDMPlUv--51Th455G5u7Q',
|
||||
'display_id': '50347',
|
||||
'ext': 'mp4',
|
||||
'title': 'Homepage Video',
|
||||
'description': 'Look I changed the description.',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/50347/OUPa5LTKV46849sLYngMqQ_small.jpg',
|
||||
'duration': 99,
|
||||
'tags': ['these', 'are', 'all', 'tags'],
|
||||
},
|
||||
}, {
|
||||
# First video from playlist below
|
||||
'url': 'https://embed.vidyard.com/share/SyStyHtYujcBHe5PkZc5DL',
|
||||
'info_dict': {
|
||||
'id': 'SyStyHtYujcBHe5PkZc5DL',
|
||||
'display_id': '41974005',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prepare the Frame and Track for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'description': r're:In this video, you will learn how to prepare the frame.+',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/41974005/IJw7oCaJcF1h7WWu3OVZ8A_small.png',
|
||||
'duration': 258.666,
|
||||
},
|
||||
}, {
|
||||
# Playlist
|
||||
'url': 'https://thelink.hubs.vidyard.com/watch/pwu7pCYWSwAnPxs8nDoFrE',
|
||||
'info_dict': {
|
||||
'id': 'pwu7pCYWSwAnPxs8nDoFrE',
|
||||
'title': 'PLAYLIST - Palm Beach Shutters- Bi-Fold Track System Installation',
|
||||
'entries': [{
|
||||
'id': 'SyStyHtYujcBHe5PkZc5DL',
|
||||
'display_id': '41974005',
|
||||
'ext': 'mp4',
|
||||
'title': 'Prepare the Frame and Track for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/41974005/IJw7oCaJcF1h7WWu3OVZ8A_small.png',
|
||||
'duration': 258.666,
|
||||
}, {
|
||||
'id': '1Fw4B84jZTXLXWqkE71RiM',
|
||||
'display_id': '5861113',
|
||||
'ext': 'mp4',
|
||||
'title': 'Palm Beach - Bi-Fold Track System "Frame Installation"',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861113/29CJ54s5g1_aP38zkKLHew_small.jpg',
|
||||
'duration': 167.858,
|
||||
}, {
|
||||
'id': 'DqP3wBvLXSpxrcqpT5kEeo',
|
||||
'display_id': '41976334',
|
||||
'ext': 'mp4',
|
||||
'title': 'Install the Track for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861090/RwG2VaTylUa6KhSTED1r1Q_small.png',
|
||||
'duration': 94.229,
|
||||
}, {
|
||||
'id': 'opfybfxpzQArxqtQYB6oBU',
|
||||
'display_id': '41976364',
|
||||
'ext': 'mp4',
|
||||
'title': 'Install the Panel for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5860926/JIOaJR08dM4QgXi_iQ2zGA_small.png',
|
||||
'duration': 191.467,
|
||||
}, {
|
||||
'id': 'rWrXvkbTNNaNqD6189HJya',
|
||||
'display_id': '41976382',
|
||||
'ext': 'mp4',
|
||||
'title': 'Adjust the Panels for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5860687/CwHxBv4UudAhOh43FVB4tw_small.png',
|
||||
'duration': 138.155,
|
||||
}, {
|
||||
'id': 'eYPTB521MZ9TPEArSethQ5',
|
||||
'display_id': '41976409',
|
||||
'ext': 'mp4',
|
||||
'title': 'Assemble and Install the Valance for Palm Beach Polysatin Shutters With BiFold Track',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/5861425/0y68qlMU4O5VKU7bJ8i_AA_small.png',
|
||||
'duration': 148.224,
|
||||
}],
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
# Non hubs.vidyard.com playlist
|
||||
'url': 'https://salesforce.vidyard.com/watch/d4vqPjs7Q5EzVEis5QT3jd',
|
||||
'info_dict': {
|
||||
'id': 'd4vqPjs7Q5EzVEis5QT3jd',
|
||||
'title': 'How To: Service Cloud: Import External Content in Lightning Knowledge',
|
||||
'entries': [{
|
||||
'id': 'mcjDpSZir2iSttbvFkx6Rv',
|
||||
'display_id': '29479036',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome to this Expert Coaching Series',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/ouyQi9WuwyiOupChUWNmjQ/7170d3485ba602e012df05_small.jpg',
|
||||
'duration': 38.205,
|
||||
}, {
|
||||
'id': '84bPYwpg243G6xYEfJdYw9',
|
||||
'display_id': '21820704',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 1 - Title + Agenda',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/HFPN0ZgQq4Ow8BghGcQSow/bfaa30123c8f6601e7d7f2_small.jpg',
|
||||
'duration': 98.016,
|
||||
}, {
|
||||
'id': 'nP17fMuvA66buVHUrzqjTi',
|
||||
'display_id': '21820707',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 2 - Import Options',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/rGRIF5nFjPI9OOA2qJ_Dbg/86a8d02bfec9a566845dd4_small.jpg',
|
||||
'duration': 199.136,
|
||||
}, {
|
||||
'id': 'm54EcwXdpA5gDBH5rgCYoV',
|
||||
'display_id': '21820710',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 3 - Importing Article Translations',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/IVX4XR8zpSsiNIHx45kz-A/1ccbf8a29a33856d06b3ed_small.jpg',
|
||||
'duration': 184.352,
|
||||
}, {
|
||||
'id': 'j4nzS42oq4hE9oRV73w3eQ',
|
||||
'display_id': '21820716',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 4 - Best Practices',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/BtrRrQpRDLbA4AT95YQyog/1f1e6b8e7fdc3fa95ec8d3_small.jpg',
|
||||
'duration': 296.960,
|
||||
}, {
|
||||
'id': 'y28PYfW5pftvers9PXzisC',
|
||||
'display_id': '21820727',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 5 - Migration Steps',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/K2CdQOXDfLcrVTF60r0bdw/a09239ada28b6ffce12b1f_small.jpg',
|
||||
'duration': 620.640,
|
||||
}, {
|
||||
'id': 'YWU1eQxYvhj29SjYoPw5jH',
|
||||
'display_id': '21820733',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chapter 6 - Demo',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/rsmhP-cO8dAa8ilvFGCX0g/7911ef415167cd14032068_small.jpg',
|
||||
'duration': 631.456,
|
||||
}, {
|
||||
'id': 'nmEvVqpwdJUgb74zKsLGxn',
|
||||
'display_id': '29479037',
|
||||
'ext': 'mp4',
|
||||
'title': 'Schedule Your Follow-Up',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/Rtwc7X4PEkF4Ae5kHi-Jvw/174ebed3f34227b1ffa1d0_small.jpg',
|
||||
'duration': 33.608,
|
||||
}],
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}, {
|
||||
# URL of iframe embed src
|
||||
'url': 'https://play.vidyard.com/iDqTwWGrd36vaLuaCY3nTs.html',
|
||||
'info_dict': {
|
||||
'id': 'iDqTwWGrd36vaLuaCY3nTs',
|
||||
'display_id': '9281009',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lightbox Embed',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/spacer.gif',
|
||||
'duration': 39.035,
|
||||
},
|
||||
}, {
|
||||
# Player JSON URL
|
||||
'url': 'https://play.vidyard.com/player/7GAApnNNbcZZ46k6JqJQSh.json?disable_analytics=0',
|
||||
'info_dict': {
|
||||
'id': '7GAApnNNbcZZ46k6JqJQSh',
|
||||
'display_id': '820026',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Art of Storytelling: How to Deliver Your Brand Story with Content & Social',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/MhbE-5sEFQu4x3fI6FkNlA/41eb5717c557cd19456910_small.jpg',
|
||||
'duration': 2153.013,
|
||||
'tags': ['Summit2017'],
|
||||
},
|
||||
}, {
|
||||
'url': 'http://share.vidyard.com/share/diYeo6YR2yiGgL8odvS8Ri',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.vidyard.com/FFlz3ZpxhIfKQ1fd9DAryA',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.vidyard.com/qhMAu5A76GZVrFzOPgSf9A/type/standalone',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_WEBPAGE_TESTS = [{
|
||||
# URL containing inline/lightbox embedded video
|
||||
'url': 'https://resources.altium.com/p/2-the-extreme-importance-of-pc-board-stack-up',
|
||||
'info_dict': {
|
||||
'id': 'GDx1oXrFWj4XHbipfoXaMn',
|
||||
'display_id': '3225198',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Extreme Importance of PC Board Stack Up',
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/73_Q3_hBexWX7Og1sae6cg/9998fa4faec921439e2c04_small.jpg',
|
||||
'duration': 3422.742,
|
||||
},
|
||||
}, {
|
||||
# <script ... id="vidyard_embed_code_DXx2sW4WaLA6hTdGFz7ja8" src="//play.vidyard.com/DXx2sW4WaLA6hTdGFz7ja8.js?
|
||||
'url': 'http://videos.vivint.com/watch/DXx2sW4WaLA6hTdGFz7ja8',
|
||||
'info_dict': {
|
||||
'id': 'DXx2sW4WaLA6hTdGFz7ja8',
|
||||
'display_id': '2746529',
|
||||
'ext': 'mp4',
|
||||
'title': 'How To Powercycle the Smart Hub Panel',
|
||||
'duration': 30.613,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/_-6cw8xQUJ3qiCs_JENc_A/b21d7a5e47967f49399d30_small.jpg',
|
||||
},
|
||||
}, {
|
||||
# <script id="vidyard_embed_code_MIBHhiLVTxga7wqLsuoDjQ" src="//embed.vidyard.com/embed/MIBHhiLVTxga7wqLsuoDjQ/inline?v=2.1">
|
||||
'url': 'https://www.babypips.com/learn/forex/introduction-to-metatrader4',
|
||||
'info_dict': {
|
||||
'id': 'MIBHhiLVTxga7wqLsuoDjQ',
|
||||
'display_id': '20291',
|
||||
'ext': 'mp4',
|
||||
'title': 'Lesson 1 - Opening an MT4 Account',
|
||||
'description': 'Never heard of MetaTrader4? Here\'s the 411 on the popular trading platform!',
|
||||
'duration': 168,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/20291/IM-G2WXQR9VBLl2Cmzvftg_small.jpg',
|
||||
},
|
||||
}, {
|
||||
# <iframe ... src="//play.vidyard.com/d61w8EQoZv1LDuPxDkQP2Q/type/background?preview=1"
|
||||
'url': 'https://www.avaya.com/en/',
|
||||
'info_dict': {
|
||||
# These values come from the generic extractor and don't matter
|
||||
'id': str,
|
||||
'title': str,
|
||||
'age_limit': 0,
|
||||
'upload_date': str,
|
||||
'description': str,
|
||||
'thumbnail': str,
|
||||
'timestamp': float,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'd61w8EQoZv1LDuPxDkQP2Q',
|
||||
'display_id': '42456529',
|
||||
'ext': 'mp4',
|
||||
'title': 'GettyImages-1027',
|
||||
'duration': 6.0,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/42061563/p6bY08d2N4e4IDz-7J4_wkgsPq3-qgcx_small.jpg',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'VAsYDi7eiqZRbHodUA2meC',
|
||||
'display_id': '42456569',
|
||||
'ext': 'mp4',
|
||||
'title': 'GettyImages-1325598833',
|
||||
'duration': 6.083,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/42052358/y3qrbDpn_2quWr_5XBi7yzS3UvEI__ZM_small.jpg',
|
||||
},
|
||||
}],
|
||||
'playlist_count': 2,
|
||||
}, {
|
||||
# <div class="vidyard-player-embed" data-uuid="vpCWTVHw3qrciLtVY94YkS"
|
||||
'url': 'https://www.gogoair.com/',
|
||||
'info_dict': {
|
||||
# These values come from the generic extractor and don't matter
|
||||
'id': str,
|
||||
'title': str,
|
||||
'description': str,
|
||||
'age_limit': 0,
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': 'vpCWTVHw3qrciLtVY94YkS',
|
||||
'display_id': '40780699',
|
||||
'ext': 'mp4',
|
||||
'title': 'Upgrade to AVANCE 100% worth it - Jason Talley, Owner and Pilot, Testimonial',
|
||||
'description': 'md5:f609824839439a51990cef55ffc472aa',
|
||||
'duration': 70.737,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/40780699/KzjfYZz5MZl2gHF_e-4i2c6ib1cLDweQ_small.jpg',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'xAmV9AsLbnitCw35paLBD8',
|
||||
'display_id': '31130867',
|
||||
'ext': 'mp4',
|
||||
'title': 'Brad Keselowski goes faster with Gogo AVANCE inflight Wi-Fi',
|
||||
'duration': 132.565,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/31130867/HknyDtLdm2Eih9JZ4A5XLjhfBX_6HRw5_small.jpg',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': 'RkkrFRNxfP79nwCQavecpF',
|
||||
'display_id': '39009815',
|
||||
'ext': 'mp4',
|
||||
'title': 'Live Demo of Gogo Galileo',
|
||||
'description': 'md5:e2df497236f4e12c3fef8b392b5f23e0',
|
||||
'duration': 112.128,
|
||||
'thumbnail': 'https://cdn.vidyard.com/thumbnails/38144873/CWLlxfUbJ4Gh0ThbUum89IsEM4yupzMb_small.jpg',
|
||||
},
|
||||
}],
|
||||
'playlist_count': 3,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
# Handle protocol-less embed URLs
|
||||
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||
if embed_url.startswith('//'):
|
||||
embed_url = f'https:{embed_url}'
|
||||
yield embed_url
|
||||
|
||||
# Extract inline/lightbox embeds
|
||||
for embed_element in re.findall(
|
||||
r'(<(?:img|div)[^>]* class=(["\'])(?:[^>"\']* )?vidyard-player-embed(?: [^>"\']*)?\2[^>]+>)', webpage):
|
||||
if video_id := extract_attributes(embed_element[0]).get('data-uuid'):
|
||||
yield f'https://play.vidyard.com/{video_id}'
|
||||
|
||||
for embed_id in re.findall(r'<script[^>]* id=["\']vidyard_embed_code_([\w-]+)["\']', webpage):
|
||||
yield f'https://play.vidyard.com/{embed_id}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_json = self._fetch_video_json(video_id)
|
||||
|
||||
if len(video_json['chapters']) == 1:
|
||||
return self._process_video_json(video_json['chapters'][0], video_id)
|
||||
|
||||
return self.playlist_result(
|
||||
[self._process_video_json(chapter, video_id) for chapter in video_json['chapters']],
|
||||
str(video_json['playerUuid']), video_json.get('name'))
|
@ -0,0 +1,108 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes, get_element_html_by_class, remove_start
|
||||
|
||||
|
||||
class VTVGoIE(InfoExtractor):
|
||||
_VALID_URL = [
|
||||
r'https?://(?:www\.)?vtvgo\.vn/(kho-video|tin-tuc)/[\w.-]*?(?P<id>\d+)(?:\.[a-z]+|/)?(?:$|[?#])',
|
||||
r'https?://(?:www\.)?vtvgo\.vn/digital/detail\.php\?(?:[^#]+&)?content_id=(?P<id>\d+)',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'https://vtvgo.vn/kho-video/bep-vtv-vit-chao-rieng-so-24-888456.html',
|
||||
'info_dict': {
|
||||
'id': '888456',
|
||||
'ext': 'mp4',
|
||||
'title': 'Bếp VTV | Vịt chao riềng | Số 24',
|
||||
'description': 'md5:2b4e93ec2b954304170d32be288ce2c8',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20230201/VIT-CHAO-RIENG_VTV_638108894672812459.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/tin-tuc/hot-search-1-zlife-khong-ngo-toi-phai-khong-862074',
|
||||
'info_dict': {
|
||||
'id': '862074',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hot Search #1 | Zlife | Không ngờ tới phải không? ',
|
||||
'description': 'md5:e967d0e2efbbebbee8814a55799b4d0f',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20220504/6b9a8552-e71c-46ce-bc9d-50c9bb506f9c.jpeg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/kho-video/918311.html',
|
||||
'info_dict': {
|
||||
'id': '918311',
|
||||
'title': 'Cà phê sáng | 05/02/2024 | Tái hiện hình ảnh Hà Nội xưa tại ngôi nhà di sản',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240205/0506_ca_phe_sang_638427226021318322.jpg',
|
||||
'description': 'md5:b121c67948f1ce58e6a036042fc14c1b',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/digital/detail.php?digital_id=168&content_id=918634',
|
||||
'info_dict': {
|
||||
'id': '918634',
|
||||
'ext': 'mp4',
|
||||
'title': 'Gặp nhau cuối năm | Táo quân 2024',
|
||||
'description': 'md5:a1c221e78e5954d29d49b2a11c20513c',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240210/d0f73369-8f03-4108-9edd-83d4bc3997b2.png',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/digital/detail.php?content_id=919358',
|
||||
'info_dict': {
|
||||
'id': '919358',
|
||||
'ext': 'mp4',
|
||||
'title': 'Chúng ta của 8 năm sau | Tập 45 | Dương có bằng chứng, nhân chứng vạch mặt ông Khiêm',
|
||||
'description': 'md5:16ff5208cac6585137f554472a4677f3',
|
||||
'thumbnail': 'https://vtvgo-images.vtvdigital.vn/images/20240221/550deff9-7736-4a0e-8b5d-33274d97cd7d.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtvgo.vn/kho-video/888456',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
m3u8_url = self._search_regex(
|
||||
r'(?:var\s+link\s*=\s*|addPlayer\()["\'](https://[^"\']+/index\.m3u8)["\']', webpage, 'm3u8 url')
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
|
||||
}
|
||||
|
||||
|
||||
class VTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vtv\.vn/video/[\w-]*?(?P<id>\d+)\.htm'
|
||||
_TESTS = [{
|
||||
'url': 'https://vtv.vn/video/thoi-su-20h-vtv1-12-6-2024-680411.htm',
|
||||
'info_dict': {
|
||||
'id': '680411',
|
||||
'ext': 'mp4',
|
||||
'title': 'Thời sự 20h VTV1 - 12/6/2024 - Video đã phát trên VTV1 | VTV.VN',
|
||||
'thumbnail': 'https://cdn-images.vtv.vn/zoom/600_315/66349b6076cb4dee98746cf1/2024/06/12/thumb/1206-ts-20h-02929741475480320806760.mp4/thumb0.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vtv.vn/video/zlife-1-khong-ngo-toi-phai-khong-vtv24-560248.htm',
|
||||
'info_dict': {
|
||||
'id': '560248',
|
||||
'ext': 'mp4',
|
||||
'title': 'ZLife #1: Không ngờ tới phải không? | VTV24 - Video đã phát trên VTV-NEWS | VTV.VN',
|
||||
'description': 'Ai đứng sau vụ việc thay đổi ảnh đại diện trên các trang mạng xã hội của VTV Digital tối 2/5?',
|
||||
'thumbnail': 'https://video-thumbs.mediacdn.vn/zoom/600_315/vtv/2022/5/13/t67s6btf3ji-16524555726231894427334.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data_vid = extract_attributes(get_element_html_by_class(
|
||||
'VCSortableInPreviewMode', get_element_html_by_class(
|
||||
'video-highlight-box', webpage)))['data-vid']
|
||||
m3u8_url = f'https://cdn-videos.vtv.vn/{remove_start(data_vid, "vtv.mediacdn.vn/")}/master.m3u8'
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage, default=None),
|
||||
'description': self._og_search_description(webpage, default=None),
|
||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||
'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'),
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue