mirror of https://github.com/yt-dlp/yt-dlp
[cleanup] Remove broken extractors (#14305)
Closes #1466, Closes #2005, Closes #4897, Closes #5118, Closes #8489, Closes #13072 Authored by: bashonlypull/12448/merge
parent
17bfaa53ed
commit
65e90aea29
@ -1,243 +0,0 @@
|
||||
import hashlib
|
||||
import hmac
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CrackleIE(InfoExtractor):
|
||||
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# Crackle is available in the United States and territories
|
||||
'url': 'https://www.crackle.com/thanksgiving/2510064',
|
||||
'info_dict': {
|
||||
'id': '2510064',
|
||||
'ext': 'mp4',
|
||||
'title': 'Touch Football',
|
||||
'description': 'md5:cfbb513cf5de41e8b56d7ab756cff4df',
|
||||
'duration': 1398,
|
||||
'view_count': int,
|
||||
'average_rating': 0,
|
||||
'age_limit': 17,
|
||||
'genre': 'Comedy',
|
||||
'creator': 'Daniel Powell',
|
||||
'artist': 'Chris Elliott, Amy Sedaris',
|
||||
'release_year': 2016,
|
||||
'series': 'Thanksgiving',
|
||||
'episode': 'Touch Football',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Trying with a list of known countries',
|
||||
],
|
||||
}, {
|
||||
'url': 'https://www.sonycrackle.com/thanksgiving/2510064',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_MEDIA_FILE_SLOTS = {
|
||||
'360p.mp4': {
|
||||
'width': 640,
|
||||
'height': 360,
|
||||
},
|
||||
'480p.mp4': {
|
||||
'width': 768,
|
||||
'height': 432,
|
||||
},
|
||||
'480p_1mbps.mp4': {
|
||||
'width': 852,
|
||||
'height': 480,
|
||||
},
|
||||
}
|
||||
|
||||
def _download_json(self, url, *args, **kwargs):
|
||||
# Authorization generation algorithm is reverse engineered from:
|
||||
# https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
|
||||
timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
|
||||
h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
|
||||
headers = {
|
||||
'Accept': 'application/json',
|
||||
'Authorization': '|'.join([h, timestamp, '117', '1']),
|
||||
}
|
||||
return InfoExtractor._download_json(self, url, *args, headers=headers, **kwargs)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
geo_bypass_country = self.get_param('geo_bypass_country', None)
|
||||
countries = orderedSet((geo_bypass_country, 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI', ''))
|
||||
num_countries, num = len(countries) - 1, 0
|
||||
|
||||
media = {}
|
||||
for num, country in enumerate(countries):
|
||||
if num == 1: # start hard-coded list
|
||||
self.report_warning('%s. Trying with a list of known countries' % (
|
||||
f'Unable to obtain video formats from {geo_bypass_country} API' if geo_bypass_country
|
||||
else 'No country code was given using --geo-bypass-country'))
|
||||
elif num == num_countries: # end of list
|
||||
geo_info = self._download_json(
|
||||
'https://web-api-us.crackle.com/Service.svc/geo/country',
|
||||
video_id, fatal=False, note='Downloading geo-location information from crackle API',
|
||||
errnote='Unable to fetch geo-location information from crackle') or {}
|
||||
country = geo_info.get('CountryCode')
|
||||
if country is None:
|
||||
continue
|
||||
self.to_screen(f'{self.IE_NAME} identified country as {country}')
|
||||
if country in countries:
|
||||
self.to_screen(f'Downloading from {country} API was already attempted. Skipping...')
|
||||
continue
|
||||
|
||||
if country is None:
|
||||
continue
|
||||
try:
|
||||
media = self._download_json(
|
||||
f'https://web-api-us.crackle.com/Service.svc/details/media/{video_id}/{country}?disableProtocols=true',
|
||||
video_id, note=f'Downloading media JSON from {country} API',
|
||||
errnote='Unable to download media JSON')
|
||||
except ExtractorError as e:
|
||||
# 401 means geo restriction, trying next country
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
continue
|
||||
raise
|
||||
|
||||
status = media.get('status')
|
||||
if status.get('messageCode') != '0':
|
||||
raise ExtractorError(
|
||||
'{} said: {} {} - {}'.format(
|
||||
self.IE_NAME, status.get('messageCodeDescription'), status.get('messageCode'), status.get('message')),
|
||||
expected=True)
|
||||
|
||||
# Found video formats
|
||||
if isinstance(media.get('MediaURLs'), list):
|
||||
break
|
||||
|
||||
ignore_no_formats = self.get_param('ignore_no_formats_error')
|
||||
|
||||
if not media or (not media.get('MediaURLs') and not ignore_no_formats):
|
||||
raise ExtractorError(
|
||||
'Unable to access the crackle API. Try passing your country code '
|
||||
'to --geo-bypass-country. If it still does not work and the '
|
||||
'video is available in your country')
|
||||
title = media['Title']
|
||||
|
||||
formats, subtitles = [], {}
|
||||
has_drm = False
|
||||
for e in media.get('MediaURLs') or []:
|
||||
if e.get('UseDRM'):
|
||||
has_drm = True
|
||||
format_url = url_or_none(e.get('DRMPath'))
|
||||
else:
|
||||
format_url = url_or_none(e.get('Path'))
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
elif ext == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||
format_url, video_id, mpd_id='dash', fatal=False)
|
||||
formats.extend(fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
elif format_url.endswith('.ism/Manifest'):
|
||||
fmts, subs = self._extract_ism_formats_and_subtitles(
|
||||
format_url, video_id, ism_id='mss', fatal=False)
|
||||
formats.extend(fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, subs)
|
||||
else:
|
||||
mfs_path = e.get('Type')
|
||||
mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
|
||||
if not mfs_info:
|
||||
continue
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': 'http-' + mfs_path.split('.')[0],
|
||||
'width': mfs_info['width'],
|
||||
'height': mfs_info['height'],
|
||||
})
|
||||
if not formats and has_drm:
|
||||
self.report_drm(video_id)
|
||||
|
||||
description = media.get('Description')
|
||||
duration = int_or_none(media.get(
|
||||
'DurationInSeconds')) or parse_duration(media.get('Duration'))
|
||||
view_count = int_or_none(media.get('CountViews'))
|
||||
average_rating = float_or_none(media.get('UserRating'))
|
||||
age_limit = parse_age_limit(media.get('Rating'))
|
||||
genre = media.get('Genre')
|
||||
release_year = int_or_none(media.get('ReleaseYear'))
|
||||
creator = media.get('Directors')
|
||||
artist = media.get('Cast')
|
||||
|
||||
if media.get('MediaTypeDisplayValue') == 'Full Episode':
|
||||
series = media.get('ShowName')
|
||||
episode = title
|
||||
season_number = int_or_none(media.get('Season'))
|
||||
episode_number = int_or_none(media.get('Episode'))
|
||||
else:
|
||||
series = episode = season_number = episode_number = None
|
||||
|
||||
cc_files = media.get('ClosedCaptionFiles')
|
||||
if isinstance(cc_files, list):
|
||||
for cc_file in cc_files:
|
||||
if not isinstance(cc_file, dict):
|
||||
continue
|
||||
cc_url = url_or_none(cc_file.get('Path'))
|
||||
if not cc_url:
|
||||
continue
|
||||
lang = cc_file.get('Locale') or 'en'
|
||||
subtitles.setdefault(lang, []).append({'url': cc_url})
|
||||
|
||||
thumbnails = []
|
||||
images = media.get('Images')
|
||||
if isinstance(images, list):
|
||||
for image_key, image_url in images.items():
|
||||
mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
|
||||
if not mobj:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': image_url,
|
||||
'width': int(mobj.group(1)),
|
||||
'height': int(mobj.group(2)),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'view_count': view_count,
|
||||
'average_rating': average_rating,
|
||||
'age_limit': age_limit,
|
||||
'genre': genre,
|
||||
'creator': creator,
|
||||
'artist': artist,
|
||||
'release_year': release_year,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'season_number': season_number,
|
||||
'episode_number': episode_number,
|
||||
'thumbnails': thumbnails,
|
||||
'subtitles': subtitles,
|
||||
'formats': formats,
|
||||
}
|
@ -1,180 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
update_url_query,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class CWTVIE(InfoExtractor):
|
||||
IE_NAME = 'cwtv'
|
||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch|guid)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cwtv.com/shows/continuum/a-stitch-in-time/?play=9149a1e1-4cb2-46d7-81b2-47d35bbd332b',
|
||||
'info_dict': {
|
||||
'id': '9149a1e1-4cb2-46d7-81b2-47d35bbd332b',
|
||||
'ext': 'mp4',
|
||||
'title': 'A Stitch in Time',
|
||||
'description': r're:(?s)City Protective Services officer Kiera Cameron is transported from 2077.+',
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
'duration': 2632,
|
||||
'timestamp': 1736928000,
|
||||
'uploader': 'CWTV',
|
||||
'chapters': 'count:5',
|
||||
'series': 'Continuum',
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'age_limit': 14,
|
||||
'upload_date': '20250115',
|
||||
'season': 'Season 1',
|
||||
'episode': 'Episode 1',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'info_dict': {
|
||||
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'ext': 'mp4',
|
||||
'title': 'Legends of Yesterday',
|
||||
'description': r're:(?s)Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote.+',
|
||||
'duration': 2665,
|
||||
'series': 'Arrow',
|
||||
'season_number': 4,
|
||||
'season': '4',
|
||||
'episode_number': 8,
|
||||
'upload_date': '20151203',
|
||||
'timestamp': 1449122100,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'redirect to http://cwtv.com/shows/arrow/',
|
||||
}, {
|
||||
'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088',
|
||||
'info_dict': {
|
||||
'id': '24282b12-ead2-42f2-95ad-26770c2c6088',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jeff Davis 4',
|
||||
'description': 'Jeff Davis is back to make you laugh.',
|
||||
'duration': 1263,
|
||||
'series': 'Whose Line Is It Anyway?',
|
||||
'season_number': 11,
|
||||
'episode_number': 20,
|
||||
'upload_date': '20151006',
|
||||
'timestamp': 1444107300,
|
||||
'age_limit': 14,
|
||||
'uploader': 'CWTV',
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
'chapters': 'count:4',
|
||||
'episode': 'Episode 20',
|
||||
'season': 'Season 11',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.cwtv.com/movies/play/?guid=0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
data = self._download_json(
|
||||
f'https://images.cwtv.com/feed/app-2/video-meta/apiversion_22/device_android/guid_{video_id}', video_id)
|
||||
if traverse_obj(data, 'result') != 'ok':
|
||||
raise ExtractorError(traverse_obj(data, (('error_msg', 'msg'), {str}, any)), expected=True)
|
||||
video_data = data['video']
|
||||
title = video_data['title']
|
||||
mpx_url = update_url_query(
|
||||
video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}',
|
||||
{'formats': 'M3U+none'})
|
||||
|
||||
season = str_or_none(video_data.get('season'))
|
||||
episode = str_or_none(video_data.get('episode'))
|
||||
if episode and season:
|
||||
episode = episode[len(season):]
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': smuggle_url(mpx_url, {'force_smil_url': True}),
|
||||
'description': video_data.get('description_long'),
|
||||
'duration': int_or_none(video_data.get('duration_secs')),
|
||||
'series': video_data.get('series_name'),
|
||||
'season_number': int_or_none(season),
|
||||
'episode_number': int_or_none(episode),
|
||||
'timestamp': parse_iso8601(video_data.get('start_time')),
|
||||
'age_limit': parse_age_limit(video_data.get('rating')),
|
||||
'ie_key': 'ThePlatform',
|
||||
'thumbnail': video_data.get('large_thumbnail'),
|
||||
}
|
||||
|
||||
|
||||
class CWTVMovieIE(InfoExtractor):
|
||||
IE_NAME = 'cwtv:movie'
|
||||
_VALID_URL = r'https?://(?:www\.)?cwtv\.com/shows/(?P<id>[\w-]+)/?\?(?:[^#]+&)?viewContext=Movies'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cwtv.com/shows/the-crush/?viewContext=Movies+Swimlane',
|
||||
'info_dict': {
|
||||
'id': '0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Crush',
|
||||
'upload_date': '20241112',
|
||||
'description': 'md5:1549acd90dff4a8273acd7284458363e',
|
||||
'chapters': 'count:9',
|
||||
'timestamp': 1731398400,
|
||||
'age_limit': 16,
|
||||
'duration': 5337,
|
||||
'series': 'The Crush',
|
||||
'season': 'Season 1',
|
||||
'uploader': 'CWTV',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'thumbnail': r're:https?://.+\.jpe?g',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
_UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
app_url = (
|
||||
self._html_search_meta('al:ios:url', webpage, default=None)
|
||||
or self._html_search_meta('al:android:url', webpage, default=None))
|
||||
video_id = (
|
||||
traverse_obj(parse_qs(app_url), ('video_id', 0, {lambda x: re.fullmatch(self._UUID_RE, x)}, 0))
|
||||
or self._search_regex([
|
||||
rf'CWTV\.Site\.curPlayingGUID\s*=\s*["\']({self._UUID_RE})',
|
||||
rf'CWTV\.Site\.viewInAppURL\s*=\s*["\']/shows/[\w-]+/watch-in-app/\?play=({self._UUID_RE})',
|
||||
], webpage, 'video ID'))
|
||||
|
||||
return self.url_result(
|
||||
f'https://www.cwtv.com/shows/{display_id}/{display_id}/?play={video_id}', CWTVIE, video_id)
|
@ -1,201 +0,0 @@
|
||||
import itertools
|
||||
|
||||
from .cbs import CBSBaseIE
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ParamountPlusIE(CBSBaseIE):
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
paramountplus:|
|
||||
https?://(?:www\.)?(?:
|
||||
paramountplus\.com/(?:shows|movies)/(?:video|[^/]+/video|[^/]+)/
|
||||
)(?P<id>[\w-]+))'''
|
||||
|
||||
# All tests are blocked outside US
|
||||
_TESTS = [{
|
||||
'url': 'https://www.paramountplus.com/shows/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/',
|
||||
'info_dict': {
|
||||
'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k',
|
||||
'ext': 'mp4',
|
||||
'title': 'CatDog - Climb Every CatDog/The Canine Mutiny',
|
||||
'description': 'md5:7ac835000645a69933df226940e3c859',
|
||||
'duration': 1426,
|
||||
'timestamp': 920264400,
|
||||
'upload_date': '19990301',
|
||||
'uploader': 'CBSI-NEW',
|
||||
'episode_number': 5,
|
||||
'thumbnail': r're:https?://.+\.jpg$',
|
||||
'season': 'Season 2',
|
||||
'chapters': 'count:3',
|
||||
'episode': 'Episode 5',
|
||||
'season_number': 2,
|
||||
'series': 'CatDog',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/',
|
||||
'info_dict': {
|
||||
'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd',
|
||||
'ext': 'mp4',
|
||||
'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)',
|
||||
'description': 'md5:f4adcea3e8b106192022e121f1565bae',
|
||||
'duration': 2506,
|
||||
'timestamp': 1627063200,
|
||||
'upload_date': '20210723',
|
||||
'uploader': 'CBSI-NEW',
|
||||
'episode_number': 81,
|
||||
'thumbnail': r're:https?://.+\.jpg$',
|
||||
'season': 'Season 2',
|
||||
'chapters': 'count:4',
|
||||
'episode': 'Episode 81',
|
||||
'season_number': 2,
|
||||
'series': 'Tooning Out The News',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/movies/video/vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC/',
|
||||
'info_dict': {
|
||||
'id': 'vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC',
|
||||
'ext': 'mp4',
|
||||
'title': 'Daddy\'s Home',
|
||||
'upload_date': '20151225',
|
||||
'description': 'md5:9a6300c504d5e12000e8707f20c54745',
|
||||
'uploader': 'CBSI-NEW',
|
||||
'timestamp': 1451030400,
|
||||
'thumbnail': r're:https?://.+\.jpg$',
|
||||
'chapters': 'count:0',
|
||||
'duration': 5761,
|
||||
'series': 'Paramount+ Movies',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'skip': 'DRM',
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/movies/video/5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc/',
|
||||
'info_dict': {
|
||||
'id': '5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'CBSI-NEW',
|
||||
'description': 'md5:bc7b6fea84ba631ef77a9bda9f2ff911',
|
||||
'timestamp': 1577865600,
|
||||
'title': 'Sonic the Hedgehog',
|
||||
'upload_date': '20200101',
|
||||
'thumbnail': r're:https?://.+\.jpg$',
|
||||
'chapters': 'count:0',
|
||||
'duration': 5932,
|
||||
'series': 'Paramount+ Movies',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'skip': 'DRM',
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/the-real-world/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/the-real-world-reunion/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/movies/video/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/movies/paw-patrol-the-movie/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_video_info(self, content_id, mpx_acc=2198311517):
|
||||
items_data = self._download_json(
|
||||
f'https://www.paramountplus.com/apps-api/v2.0/androidtv/video/cid/{content_id}.json',
|
||||
content_id, query={
|
||||
'locale': 'en-us',
|
||||
'at': 'ABCXgPuoStiPipsK0OHVXIVh68zNys+G4f7nW9R6qH68GDOcneW6Kg89cJXGfiQCsj0=',
|
||||
}, headers=self.geo_verification_headers())
|
||||
|
||||
asset_types = {
|
||||
item.get('assetType'): {
|
||||
'format': 'SMIL',
|
||||
'formats': 'M3U+none,MPEG4', # '+none' specifies ProtectionScheme (no DRM)
|
||||
} for item in items_data['itemList']
|
||||
}
|
||||
item = items_data['itemList'][-1]
|
||||
|
||||
info, error = {}, None
|
||||
metadata = {
|
||||
'title': item.get('title'),
|
||||
'series': item.get('seriesTitle'),
|
||||
'season_number': int_or_none(item.get('seasonNum')),
|
||||
'episode_number': int_or_none(item.get('episodeNum')),
|
||||
'duration': int_or_none(item.get('duration')),
|
||||
'thumbnail': url_or_none(item.get('thumbnail')),
|
||||
}
|
||||
try:
|
||||
info = self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info=metadata)
|
||||
except ExtractorError as e:
|
||||
error = e
|
||||
|
||||
# Check for DRM formats to give appropriate error
|
||||
if not info.get('formats'):
|
||||
for query in asset_types.values():
|
||||
query['formats'] = 'MPEG-DASH,M3U,MPEG4' # allows DRM formats
|
||||
|
||||
try:
|
||||
drm_info = self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info=metadata)
|
||||
except ExtractorError:
|
||||
if error:
|
||||
raise error from None
|
||||
raise
|
||||
if drm_info['formats']:
|
||||
self.report_drm(content_id)
|
||||
elif error:
|
||||
raise error
|
||||
|
||||
return info
|
||||
|
||||
|
||||
class ParamountPlusSeriesIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?paramountplus\.com/shows/(?P<id>[a-zA-Z0-9-_]+)/?(?:[#?]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.paramountplus.com/shows/drake-josh',
|
||||
'playlist_mincount': 50,
|
||||
'info_dict': {
|
||||
'id': 'drake-josh',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/hawaii_five_0/',
|
||||
'playlist_mincount': 240,
|
||||
'info_dict': {
|
||||
'id': 'hawaii_five_0',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.paramountplus.com/shows/spongebob-squarepants/',
|
||||
'playlist_mincount': 248,
|
||||
'info_dict': {
|
||||
'id': 'spongebob-squarepants',
|
||||
},
|
||||
}]
|
||||
|
||||
def _entries(self, show_name):
|
||||
for page in itertools.count():
|
||||
show_json = self._download_json(
|
||||
f'https://www.paramountplus.com/shows/{show_name}/xhr/episodes/page/{page}/size/50/xs/0/season/0', show_name)
|
||||
if not show_json.get('success'):
|
||||
return
|
||||
for episode in show_json['result']['data']:
|
||||
yield self.url_result(
|
||||
'https://www.paramountplus.com{}'.format(episode['url']),
|
||||
ie=ParamountPlusIE.ie_key(), video_id=episode['content_id'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_name = self._match_id(url)
|
||||
return self.playlist_result(self._entries(show_name), playlist_id=show_name)
|
@ -1,119 +0,0 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
qualities,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class SixPlayIE(InfoExtractor):
|
||||
IE_NAME = '6play'
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
|
||||
'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
|
||||
'info_dict': {
|
||||
'id': '12041051',
|
||||
'ext': 'mp4',
|
||||
'title': 'Le but qui a marqué l\'histoire du football français !',
|
||||
'description': 'md5:b59e7e841d646ef1eb42a7868eb6a851',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, video_id = self._match_valid_url(url).groups()
|
||||
service, consumer_name = {
|
||||
'6play.fr': ('6play', 'm6web'),
|
||||
'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
|
||||
'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'),
|
||||
'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'),
|
||||
}.get(domain, ('6play', 'm6web'))
|
||||
|
||||
data = self._download_json(
|
||||
f'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/{service}/videos/clip_{video_id}',
|
||||
video_id, headers={
|
||||
'x-customer-name': consumer_name,
|
||||
}, query={
|
||||
'csa': 5,
|
||||
'with': 'clips',
|
||||
})
|
||||
|
||||
clip_data = data['clips'][0]
|
||||
title = clip_data['title']
|
||||
|
||||
urls = []
|
||||
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
|
||||
formats = []
|
||||
subtitles = {}
|
||||
assets = clip_data.get('assets') or []
|
||||
for asset in assets:
|
||||
asset_url = asset.get('full_physical_path')
|
||||
protocol = asset.get('protocol')
|
||||
if not asset_url or ((protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264') and not ('_drmnp.ism/' in asset_url or '_unpnp.ism/' in asset_url)) or asset_url in urls:
|
||||
continue
|
||||
urls.append(asset_url)
|
||||
container = asset.get('video_container')
|
||||
ext = determine_ext(asset_url)
|
||||
if protocol == 'http_subtitle' or ext == 'vtt':
|
||||
subtitles.setdefault('fr', []).append({'url': asset_url})
|
||||
continue
|
||||
if container == 'm3u8' or ext == 'm3u8':
|
||||
if protocol == 'usp':
|
||||
if parse_qs(asset_url).get('token', [None])[0]:
|
||||
urlh = self._request_webpage(
|
||||
asset_url, video_id, fatal=False,
|
||||
headers=self.geo_verification_headers())
|
||||
if not urlh:
|
||||
continue
|
||||
asset_url = urlh.url
|
||||
asset_url = asset_url.replace('_drmnp.ism/', '_unpnp.ism/')
|
||||
for i in range(3, 0, -1):
|
||||
asset_url = asset_url.replace('_sd1/', f'_sd{i}/')
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
asset_url.replace('.m3u8', '.mpd'),
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
if m3u8_formats:
|
||||
break
|
||||
else:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif container == 'mp4' or ext == 'mp4':
|
||||
quality = asset.get('video_quality')
|
||||
formats.append({
|
||||
'url': asset_url,
|
||||
'format_id': quality,
|
||||
'quality': quality_key(quality),
|
||||
'ext': ext,
|
||||
})
|
||||
|
||||
def get(getter):
|
||||
for src in (data, clip_data):
|
||||
v = try_get(src, getter, str)
|
||||
if v:
|
||||
return v
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': get(lambda x: x['description']),
|
||||
'duration': int_or_none(clip_data.get('duration')),
|
||||
'series': get(lambda x: x['program']['title']),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -1,167 +0,0 @@
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
clean_podcast_url,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class SpotifyBaseIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_ACCESS_TOKEN = None
|
||||
_OPERATION_HASHES = {
|
||||
'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
|
||||
'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
|
||||
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
|
||||
}
|
||||
_VALID_URL_TEMPL = r'https?://open\.spotify\.com/(?:embed-podcast/|embed/|)%s/(?P<id>[^/?&#]+)'
|
||||
_EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://open\.spotify.com/embed/[^"]+)"']
|
||||
|
||||
def _real_initialize(self):
|
||||
self._ACCESS_TOKEN = self._download_json(
|
||||
'https://open.spotify.com/get_access_token', None)['accessToken']
|
||||
|
||||
def _call_api(self, operation, video_id, variables, **kwargs):
|
||||
return self._download_json(
|
||||
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
|
||||
'operationName': 'query' + operation,
|
||||
'variables': json.dumps(variables),
|
||||
'extensions': json.dumps({
|
||||
'persistedQuery': {
|
||||
'sha256Hash': self._OPERATION_HASHES[operation],
|
||||
},
|
||||
}),
|
||||
}, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN},
|
||||
**kwargs)['data']
|
||||
|
||||
def _extract_episode(self, episode, series):
|
||||
episode_id = episode['id']
|
||||
title = episode['name'].strip()
|
||||
|
||||
formats = []
|
||||
audio_preview = episode.get('audioPreview') or {}
|
||||
audio_preview_url = audio_preview.get('url')
|
||||
if audio_preview_url:
|
||||
f = {
|
||||
'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
audio_preview_format = audio_preview.get('format')
|
||||
if audio_preview_format:
|
||||
f['format_id'] = audio_preview_format
|
||||
mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
|
||||
if mobj:
|
||||
f.update({
|
||||
'abr': int(mobj.group(2)),
|
||||
'ext': mobj.group(1).lower(),
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
for item in (try_get(episode, lambda x: x['audio']['items']) or []):
|
||||
item_url = item.get('url')
|
||||
if not (item_url and item.get('externallyHosted')):
|
||||
continue
|
||||
formats.append({
|
||||
'url': clean_podcast_url(item_url),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': strip_or_none(episode.get('description')),
|
||||
'duration': float_or_none(try_get(
|
||||
episode, lambda x: x['duration']['totalMilliseconds']), 1000),
|
||||
'release_date': unified_strdate(try_get(
|
||||
episode, lambda x: x['releaseDate']['isoString'])),
|
||||
'series': series,
|
||||
}
|
||||
|
||||
|
||||
class SpotifyIE(SpotifyBaseIE):
|
||||
IE_NAME = 'spotify'
|
||||
IE_DESC = 'Spotify episodes'
|
||||
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
|
||||
_TESTS = [{
|
||||
'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
|
||||
'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
|
||||
'info_dict': {
|
||||
'id': '4Z7GAJ50bgctf6uclHlWKo',
|
||||
'ext': 'mp3',
|
||||
'title': 'From the archive: Why time management is ruining our lives',
|
||||
'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
|
||||
'duration': 2083.605,
|
||||
'release_date': '20201217',
|
||||
'series': "The Guardian's Audio Long Reads",
|
||||
},
|
||||
}, {
|
||||
'url': 'https://open.spotify.com/embed/episode/4TvCsKKs2thXmarHigWvXE?si=7eatS8AbQb6RxqO2raIuWA',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
episode = self._call_api('Episode', episode_id, {
|
||||
'uri': 'spotify:episode:' + episode_id,
|
||||
})['episode']
|
||||
return self._extract_episode(
|
||||
episode, try_get(episode, lambda x: x['podcast']['name']))
|
||||
|
||||
|
||||
class SpotifyShowIE(SpotifyBaseIE):
|
||||
IE_NAME = 'spotify:show'
|
||||
IE_DESC = 'Spotify shows'
|
||||
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
|
||||
_TEST = {
|
||||
'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
|
||||
'info_dict': {
|
||||
'id': '4PM9Ke6l66IRNpottHKV9M',
|
||||
'title': 'The Story from the Guardian',
|
||||
'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
|
||||
},
|
||||
'playlist_mincount': 36,
|
||||
}
|
||||
_PER_PAGE = 100
|
||||
|
||||
def _fetch_page(self, show_id, page=0):
|
||||
return self._call_api('ShowEpisodes', show_id, {
|
||||
'limit': 100,
|
||||
'offset': page * self._PER_PAGE,
|
||||
'uri': f'spotify:show:{show_id}',
|
||||
}, note=f'Downloading page {page + 1} JSON metadata')['podcast']
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
first_page = self._fetch_page(show_id)
|
||||
|
||||
def _entries(page):
|
||||
podcast = self._fetch_page(show_id, page) if page else first_page
|
||||
yield from map(
|
||||
functools.partial(self._extract_episode, series=podcast.get('name')),
|
||||
traverse_obj(podcast, ('episodes', 'items', ..., 'episode')))
|
||||
|
||||
return self.playlist_result(
|
||||
OnDemandPagedList(_entries, self._PER_PAGE),
|
||||
show_id, first_page.get('name'), first_page.get('description'))
|
@ -1,52 +0,0 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
class XanimuIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?xanimu\.com/(?P<id>[^/]+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://xanimu.com/51944-the-princess-the-frog-hentai/',
|
||||
'md5': '899b88091d753d92dad4cb63bbf357a7',
|
||||
'info_dict': {
|
||||
'id': '51944-the-princess-the-frog-hentai',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Princess + The Frog Hentai',
|
||||
'thumbnail': 'https://xanimu.com/storage/2020/09/the-princess-and-the-frog-hentai.jpg',
|
||||
'description': r're:^Enjoy The Princess \+ The Frog Hentai',
|
||||
'duration': 207.0,
|
||||
'age_limit': 18,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://xanimu.com/huge-expansion/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = []
|
||||
for format_id in ['videoHigh', 'videoLow']:
|
||||
format_url = self._search_json(
|
||||
rf'var\s+{re.escape(format_id)}\s*=', webpage, format_id,
|
||||
video_id, default=None, contains_pattern=r'[\'"]([^\'"]+)[\'"]')
|
||||
if format_url:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'quality': -2 if format_id.endswith('Low') else None,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': self._search_regex(r'[\'"]headline[\'"]:\s*[\'"]([^"]+)[\'"]', webpage,
|
||||
'title', default=None) or self._html_extract_title(webpage),
|
||||
'thumbnail': self._html_search_meta('thumbnailUrl', webpage, default=None),
|
||||
'description': self._html_search_meta('description', webpage, default=None),
|
||||
'duration': int_or_none(self._search_regex(r'duration:\s*[\'"]([^\'"]+?)[\'"]',
|
||||
webpage, 'duration', fatal=False)),
|
||||
'age_limit': 18,
|
||||
}
|
Loading…
Reference in New Issue