mirror of https://github.com/yt-dlp/yt-dlp
Merge branch 'yt-dlp:master' into patch-1
commit
0806b5a266
@ -0,0 +1,96 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj
|
||||
)
|
||||
|
||||
|
||||
class AirTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.air\.tv/watch\?v=(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# without youtube_id
|
||||
'url': 'https://www.air.tv/watch?v=W87jcWleSn2hXZN47zJZsQ',
|
||||
'info_dict': {
|
||||
'id': 'W87jcWleSn2hXZN47zJZsQ',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20221003',
|
||||
'release_timestamp': 1664792603,
|
||||
'channel_id': 'vgfManQlRQKgoFQ8i8peFQ',
|
||||
'title': 'md5:c12d49ed367c3dadaa67659aff43494c',
|
||||
'upload_date': '20221003',
|
||||
'duration': 151,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
|
||||
'timestamp': 1664792603,
|
||||
}
|
||||
}, {
|
||||
# with youtube_id
|
||||
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
|
||||
'info_dict': {
|
||||
'id': '2ZTqmpee-bQ',
|
||||
'ext': 'mp4',
|
||||
'comment_count': int,
|
||||
'tags': 'count:11',
|
||||
'channel_follower_count': int,
|
||||
'like_count': int,
|
||||
'uploader': 'Newsflare',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/2ZTqmpee-bQ/maxresdefault.webp',
|
||||
'availability': 'public',
|
||||
'title': 'Geese Chase Alligator Across Golf Course',
|
||||
'uploader_id': 'NewsflareBreaking',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCzSSoloGEz10HALUAbYhngQ',
|
||||
'description': 'md5:99b21d9cea59330149efbd9706e208f5',
|
||||
'age_limit': 0,
|
||||
'channel_id': 'UCzSSoloGEz10HALUAbYhngQ',
|
||||
'uploader_url': 'http://www.youtube.com/user/NewsflareBreaking',
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'channel': 'Newsflare',
|
||||
'duration': 37,
|
||||
'upload_date': '20180511',
|
||||
}
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitle(self, json_data, video_id):
|
||||
formats, subtitles = [], {}
|
||||
for source in traverse_obj(json_data, 'sources', 'sources_desktop', ...):
|
||||
ext = determine_ext(source.get('src'), mimetype2ext(source.get('type')))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('src'), video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({'url': source.get('src'), 'ext': ext})
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['initialState']['videos'][display_id]
|
||||
if nextjs_json.get('youtube_id'):
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/watch?v={nextjs_json.get("youtube_id")}', YoutubeIE)
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitle(nextjs_json, display_id)
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': nextjs_json.get('title') or self._html_search_meta('og:title', webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': nextjs_json.get('description') or None,
|
||||
'duration': int_or_none(nextjs_json.get('duration')),
|
||||
'thumbnails': [
|
||||
{'url': thumbnail}
|
||||
for thumbnail in traverse_obj(nextjs_json, ('default_thumbnails', ...))],
|
||||
'channel_id': traverse_obj(nextjs_json, 'channel', 'channel_slug'),
|
||||
'timestamp': parse_iso8601(nextjs_json.get('created')),
|
||||
'release_timestamp': parse_iso8601(nextjs_json.get('published')),
|
||||
'view_count': int_or_none(nextjs_json.get('views')),
|
||||
}
|
@ -1,31 +1,51 @@
|
||||
from .common import InfoExtractor
|
||||
from .uplynk import UplynkPreplayIE
|
||||
from ..utils import HEADRequest, float_or_none, make_archive_id, smuggle_url
|
||||
|
||||
|
||||
class FoxSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
|
||||
'md5': 'b49050e955bebe32c301972e4012ac17',
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.foxsports.com/watch/play-612168c6700004b',
|
||||
'info_dict': {
|
||||
'id': '432609859715',
|
||||
'id': 'b72f5bd8658140baa5791bb676433733',
|
||||
'ext': 'mp4',
|
||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||
# TODO: fix timestamp
|
||||
'upload_date': '19700101', # '20150423',
|
||||
# 'timestamp': 1429761109,
|
||||
'uploader': 'NEWA-FNG-FOXSPORTS',
|
||||
'display_id': 'play-612168c6700004b',
|
||||
'title': 'md5:e0c4ecac3a1f25295b4fae22fb5c126a',
|
||||
'description': 'md5:371bc43609708ae2b9e1a939229762af',
|
||||
'uploader_id': '06b4a36349624051a9ba52ac3a91d268',
|
||||
'upload_date': '20221205',
|
||||
'timestamp': 1670262586,
|
||||
'duration': 31.7317,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'extra_param_to_segment_url': str,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_ld = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
|
||||
data = self._download_json(
|
||||
f'https://api3.fox.com/v2.0/vodplayer/sportsclip/{video_id}',
|
||||
video_id, note='Downloading API JSON', headers={
|
||||
'x-api-key': 'cf289e299efdfa39fb6316f259d1de93',
|
||||
})
|
||||
preplay_url = self._request_webpage(
|
||||
HEADRequest(data['url']), video_id, 'Fetching preplay URL').geturl()
|
||||
|
||||
return self.url_result(
|
||||
'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': UplynkPreplayIE.ie_key(),
|
||||
'url': smuggle_url(preplay_url, {'Origin': 'https://www.foxsports.com'}),
|
||||
'display_id': video_id,
|
||||
'title': data.get('name') or json_ld.get('title'),
|
||||
'description': data.get('description') or json_ld.get('description'),
|
||||
'duration': float_or_none(data.get('durationInSeconds')),
|
||||
'timestamp': json_ld.get('timestamp'),
|
||||
'thumbnails': json_ld.get('thumbnails'),
|
||||
'_old_archive_ids': [make_archive_id(self, video_id)],
|
||||
}
|
||||
|
@ -0,0 +1,43 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class OnePlacePodcastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.oneplace\.com/[\w]+/[^/]+/listen/[\w-]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.oneplace.com/ministries/a-daily-walk/listen/living-in-the-last-days-part-2-958461.html',
|
||||
'info_dict': {
|
||||
'id': '958461',
|
||||
'ext': 'mp3',
|
||||
'title': 'Living in the Last Days Part 2 | A Daily Walk with John Randall',
|
||||
'description': 'md5:fbb8f1cf21447ac54ecaa2887fc20c6e',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.oneplace.com/ministries/ankerberg-show/listen/ep-3-relying-on-the-constant-companionship-of-the-holy-spirit-part-2-922513.html',
|
||||
'info_dict': {
|
||||
'id': '922513',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:8b810b4349aa40a5d033b4536fe428e1',
|
||||
'title': 'md5:ce10f7d8d5ddcf485ed8905ef109659d',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': self._search_regex((
|
||||
r'mp3-url\s*=\s*"([^"]+)',
|
||||
r'<div[^>]+id\s*=\s*"player"[^>]+data-media-url\s*=\s*"(?P<media_url>[^"]+)',
|
||||
), webpage, 'media url'),
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'title': self._html_search_regex((
|
||||
r'<div[^>]class\s*=\s*"details"[^>]+>[^<]<h2[^>]+>(?P<content>[^>]+)>',
|
||||
self._meta_regex('og:title'), self._meta_regex('title'),
|
||||
), webpage, 'title', group='content', default=None),
|
||||
'description': self._html_search_regex(
|
||||
r'<div[^>]+class="[^"]+epDesc"[^>]*>\s*(?P<desc>.+?)\s*</div>',
|
||||
webpage, 'description', default=None),
|
||||
}
|
Loading…
Reference in New Issue