[cleanup] Remove broken extractors (#14305)

Closes #1466, Closes #2005, Closes #4897, Closes #5118, Closes #8489, Closes #13072
Authored by: bashonly
pull/12448/merge
bashonly 7 days ago committed by GitHub
parent 17bfaa53ed
commit 65e90aea29
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -424,7 +424,6 @@ from .cpac import (
CPACPlaylistIE,
)
from .cracked import CrackedIE
from .crackle import CrackleIE
from .craftsy import CraftsyIE
from .crooksandliars import CrooksAndLiarsIE
from .crowdbunker import (
@ -444,10 +443,6 @@ from .curiositystream import (
CuriosityStreamIE,
CuriosityStreamSeriesIE,
)
from .cwtv import (
CWTVIE,
CWTVMovieIE,
)
from .cybrary import (
CybraryCourseIE,
CybraryIE,
@ -1467,10 +1462,6 @@ from .panopto import (
PanoptoListIE,
PanoptoPlaylistIE,
)
from .paramountplus import (
ParamountPlusIE,
ParamountPlusSeriesIE,
)
from .parler import ParlerIE
from .parlview import ParlviewIE
from .parti import (
@ -1849,7 +1840,6 @@ from .simplecast import (
SimplecastPodcastIE,
)
from .sina import SinaIE
from .sixplay import SixPlayIE
from .skeb import SkebIE
from .sky import (
SkyNewsIE,
@ -1930,10 +1920,6 @@ from .spiegel import SpiegelIE
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
from .spotify import (
SpotifyIE,
SpotifyShowIE,
)
from .spreaker import (
SpreakerIE,
SpreakerShowIE,
@ -2477,7 +2463,6 @@ from .wykop import (
WykopPostCommentIE,
WykopPostIE,
)
from .xanimu import XanimuIE
from .xboxclips import XboxClipsIE
from .xhamster import (
XHamsterEmbedIE,

@ -5,8 +5,6 @@ import zlib
from .anvato import AnvatoIE
from .common import InfoExtractor
from .paramountplus import ParamountPlusIE
from ..networking import HEADRequest
from ..utils import (
ExtractorError,
UserNotLive,
@ -132,13 +130,7 @@ class CBSNewsEmbedIE(CBSNewsBaseIE):
video_id = item['mpxRefId']
video_url = self._get_video_url(item)
if not video_url:
# Old embeds redirect user to ParamountPlus but most links are 404
pplus_url = f'https://www.paramountplus.com/shows/video/{video_id}'
try:
self._request_webpage(HEADRequest(pplus_url), video_id)
return self.url_result(pplus_url, ParamountPlusIE)
except ExtractorError:
self.raise_no_formats('This video is no longer available', True, video_id)
raise ExtractorError('This video is no longer available', expected=True)
return self._extract_video(item, video_url, video_id)

@ -1,243 +0,0 @@
import hashlib
import hmac
import re
import time
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
determine_ext,
float_or_none,
int_or_none,
orderedSet,
parse_age_limit,
parse_duration,
url_or_none,
)
class CrackleIE(InfoExtractor):
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
_TESTS = [{
# Crackle is available in the United States and territories
'url': 'https://www.crackle.com/thanksgiving/2510064',
'info_dict': {
'id': '2510064',
'ext': 'mp4',
'title': 'Touch Football',
'description': 'md5:cfbb513cf5de41e8b56d7ab756cff4df',
'duration': 1398,
'view_count': int,
'average_rating': 0,
'age_limit': 17,
'genre': 'Comedy',
'creator': 'Daniel Powell',
'artist': 'Chris Elliott, Amy Sedaris',
'release_year': 2016,
'series': 'Thanksgiving',
'episode': 'Touch Football',
'season_number': 1,
'episode_number': 1,
},
'params': {
# m3u8 download
'skip_download': True,
},
'expected_warnings': [
'Trying with a list of known countries',
],
}, {
'url': 'https://www.sonycrackle.com/thanksgiving/2510064',
'only_matching': True,
}]
_MEDIA_FILE_SLOTS = {
'360p.mp4': {
'width': 640,
'height': 360,
},
'480p.mp4': {
'width': 768,
'height': 432,
},
'480p_1mbps.mp4': {
'width': 852,
'height': 480,
},
}
def _download_json(self, url, *args, **kwargs):
# Authorization generation algorithm is reverse engineered from:
# https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
headers = {
'Accept': 'application/json',
'Authorization': '|'.join([h, timestamp, '117', '1']),
}
return InfoExtractor._download_json(self, url, *args, headers=headers, **kwargs)
def _real_extract(self, url):
video_id = self._match_id(url)
geo_bypass_country = self.get_param('geo_bypass_country', None)
countries = orderedSet((geo_bypass_country, 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI', ''))
num_countries, num = len(countries) - 1, 0
media = {}
for num, country in enumerate(countries):
if num == 1: # start hard-coded list
self.report_warning('%s. Trying with a list of known countries' % (
f'Unable to obtain video formats from {geo_bypass_country} API' if geo_bypass_country
else 'No country code was given using --geo-bypass-country'))
elif num == num_countries: # end of list
geo_info = self._download_json(
'https://web-api-us.crackle.com/Service.svc/geo/country',
video_id, fatal=False, note='Downloading geo-location information from crackle API',
errnote='Unable to fetch geo-location information from crackle') or {}
country = geo_info.get('CountryCode')
if country is None:
continue
self.to_screen(f'{self.IE_NAME} identified country as {country}')
if country in countries:
self.to_screen(f'Downloading from {country} API was already attempted. Skipping...')
continue
if country is None:
continue
try:
media = self._download_json(
f'https://web-api-us.crackle.com/Service.svc/details/media/{video_id}/{country}?disableProtocols=true',
video_id, note=f'Downloading media JSON from {country} API',
errnote='Unable to download media JSON')
except ExtractorError as e:
# 401 means geo restriction, trying next country
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
continue
raise
status = media.get('status')
if status.get('messageCode') != '0':
raise ExtractorError(
'{} said: {} {} - {}'.format(
self.IE_NAME, status.get('messageCodeDescription'), status.get('messageCode'), status.get('message')),
expected=True)
# Found video formats
if isinstance(media.get('MediaURLs'), list):
break
ignore_no_formats = self.get_param('ignore_no_formats_error')
if not media or (not media.get('MediaURLs') and not ignore_no_formats):
raise ExtractorError(
'Unable to access the crackle API. Try passing your country code '
'to --geo-bypass-country. If it still does not work and the '
'video is available in your country')
title = media['Title']
formats, subtitles = [], {}
has_drm = False
for e in media.get('MediaURLs') or []:
if e.get('UseDRM'):
has_drm = True
format_url = url_or_none(e.get('DRMPath'))
else:
format_url = url_or_none(e.get('Path'))
if not format_url:
continue
ext = determine_ext(format_url)
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
elif ext == 'mpd':
fmts, subs = self._extract_mpd_formats_and_subtitles(
format_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
elif format_url.endswith('.ism/Manifest'):
fmts, subs = self._extract_ism_formats_and_subtitles(
format_url, video_id, ism_id='mss', fatal=False)
formats.extend(fmts)
subtitles = self._merge_subtitles(subtitles, subs)
else:
mfs_path = e.get('Type')
mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
if not mfs_info:
continue
formats.append({
'url': format_url,
'format_id': 'http-' + mfs_path.split('.')[0],
'width': mfs_info['width'],
'height': mfs_info['height'],
})
if not formats and has_drm:
self.report_drm(video_id)
description = media.get('Description')
duration = int_or_none(media.get(
'DurationInSeconds')) or parse_duration(media.get('Duration'))
view_count = int_or_none(media.get('CountViews'))
average_rating = float_or_none(media.get('UserRating'))
age_limit = parse_age_limit(media.get('Rating'))
genre = media.get('Genre')
release_year = int_or_none(media.get('ReleaseYear'))
creator = media.get('Directors')
artist = media.get('Cast')
if media.get('MediaTypeDisplayValue') == 'Full Episode':
series = media.get('ShowName')
episode = title
season_number = int_or_none(media.get('Season'))
episode_number = int_or_none(media.get('Episode'))
else:
series = episode = season_number = episode_number = None
cc_files = media.get('ClosedCaptionFiles')
if isinstance(cc_files, list):
for cc_file in cc_files:
if not isinstance(cc_file, dict):
continue
cc_url = url_or_none(cc_file.get('Path'))
if not cc_url:
continue
lang = cc_file.get('Locale') or 'en'
subtitles.setdefault(lang, []).append({'url': cc_url})
thumbnails = []
images = media.get('Images')
if isinstance(images, list):
for image_key, image_url in images.items():
mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
if not mobj:
continue
thumbnails.append({
'url': image_url,
'width': int(mobj.group(1)),
'height': int(mobj.group(2)),
})
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'average_rating': average_rating,
'age_limit': age_limit,
'genre': genre,
'creator': creator,
'artist': artist,
'release_year': release_year,
'series': series,
'episode': episode,
'season_number': season_number,
'episode_number': episode_number,
'thumbnails': thumbnails,
'subtitles': subtitles,
'formats': formats,
}

@ -1,180 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
parse_age_limit,
parse_iso8601,
parse_qs,
smuggle_url,
str_or_none,
update_url_query,
)
from ..utils.traversal import traverse_obj
class CWTVIE(InfoExtractor):
IE_NAME = 'cwtv'
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch|guid)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
_TESTS = [{
'url': 'https://www.cwtv.com/shows/continuum/a-stitch-in-time/?play=9149a1e1-4cb2-46d7-81b2-47d35bbd332b',
'info_dict': {
'id': '9149a1e1-4cb2-46d7-81b2-47d35bbd332b',
'ext': 'mp4',
'title': 'A Stitch in Time',
'description': r're:(?s)City Protective Services officer Kiera Cameron is transported from 2077.+',
'thumbnail': r're:https?://.+\.jpe?g',
'duration': 2632,
'timestamp': 1736928000,
'uploader': 'CWTV',
'chapters': 'count:5',
'series': 'Continuum',
'season_number': 1,
'episode_number': 1,
'age_limit': 14,
'upload_date': '20250115',
'season': 'Season 1',
'episode': 'Episode 1',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
'info_dict': {
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
'ext': 'mp4',
'title': 'Legends of Yesterday',
'description': r're:(?s)Oliver and Barry Allen take Kendra Saunders and Carter Hall to a remote.+',
'duration': 2665,
'series': 'Arrow',
'season_number': 4,
'season': '4',
'episode_number': 8,
'upload_date': '20151203',
'timestamp': 1449122100,
},
'params': {
# m3u8 download
'skip_download': True,
},
'skip': 'redirect to http://cwtv.com/shows/arrow/',
}, {
'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088',
'info_dict': {
'id': '24282b12-ead2-42f2-95ad-26770c2c6088',
'ext': 'mp4',
'title': 'Jeff Davis 4',
'description': 'Jeff Davis is back to make you laugh.',
'duration': 1263,
'series': 'Whose Line Is It Anyway?',
'season_number': 11,
'episode_number': 20,
'upload_date': '20151006',
'timestamp': 1444107300,
'age_limit': 14,
'uploader': 'CWTV',
'thumbnail': r're:https?://.+\.jpe?g',
'chapters': 'count:4',
'episode': 'Episode 20',
'season': 'Season 11',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
'only_matching': True,
}, {
'url': 'http://cwtvpr.com/the-cw/video?watch=9eee3f60-ef4e-440b-b3b2-49428ac9c54e',
'only_matching': True,
}, {
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?watch=6b15e985-9345-4f60-baf8-56e96be57c63',
'only_matching': True,
}, {
'url': 'http://www.cwtv.com/movies/play/?guid=0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
f'https://images.cwtv.com/feed/app-2/video-meta/apiversion_22/device_android/guid_{video_id}', video_id)
if traverse_obj(data, 'result') != 'ok':
raise ExtractorError(traverse_obj(data, (('error_msg', 'msg'), {str}, any)), expected=True)
video_data = data['video']
title = video_data['title']
mpx_url = update_url_query(
video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}',
{'formats': 'M3U+none'})
season = str_or_none(video_data.get('season'))
episode = str_or_none(video_data.get('episode'))
if episode and season:
episode = episode[len(season):]
return {
'_type': 'url_transparent',
'id': video_id,
'title': title,
'url': smuggle_url(mpx_url, {'force_smil_url': True}),
'description': video_data.get('description_long'),
'duration': int_or_none(video_data.get('duration_secs')),
'series': video_data.get('series_name'),
'season_number': int_or_none(season),
'episode_number': int_or_none(episode),
'timestamp': parse_iso8601(video_data.get('start_time')),
'age_limit': parse_age_limit(video_data.get('rating')),
'ie_key': 'ThePlatform',
'thumbnail': video_data.get('large_thumbnail'),
}
class CWTVMovieIE(InfoExtractor):
IE_NAME = 'cwtv:movie'
_VALID_URL = r'https?://(?:www\.)?cwtv\.com/shows/(?P<id>[\w-]+)/?\?(?:[^#]+&)?viewContext=Movies'
_TESTS = [{
'url': 'https://www.cwtv.com/shows/the-crush/?viewContext=Movies+Swimlane',
'info_dict': {
'id': '0a8e8b5b-1356-41d5-9a6a-4eda1a6feb6c',
'ext': 'mp4',
'title': 'The Crush',
'upload_date': '20241112',
'description': 'md5:1549acd90dff4a8273acd7284458363e',
'chapters': 'count:9',
'timestamp': 1731398400,
'age_limit': 16,
'duration': 5337,
'series': 'The Crush',
'season': 'Season 1',
'uploader': 'CWTV',
'season_number': 1,
'episode': 'Episode 1',
'episode_number': 1,
'thumbnail': r're:https?://.+\.jpe?g',
},
'params': {
# m3u8 download
'skip_download': True,
},
}]
_UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
app_url = (
self._html_search_meta('al:ios:url', webpage, default=None)
or self._html_search_meta('al:android:url', webpage, default=None))
video_id = (
traverse_obj(parse_qs(app_url), ('video_id', 0, {lambda x: re.fullmatch(self._UUID_RE, x)}, 0))
or self._search_regex([
rf'CWTV\.Site\.curPlayingGUID\s*=\s*["\']({self._UUID_RE})',
rf'CWTV\.Site\.viewInAppURL\s*=\s*["\']/shows/[\w-]+/watch-in-app/\?play=({self._UUID_RE})',
], webpage, 'video ID'))
return self.url_result(
f'https://www.cwtv.com/shows/{display_id}/{display_id}/?play={video_id}', CWTVIE, video_id)

@ -1,201 +0,0 @@
import itertools
from .cbs import CBSBaseIE
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
url_or_none,
)
class ParamountPlusIE(CBSBaseIE):
_VALID_URL = r'''(?x)
(?:
paramountplus:|
https?://(?:www\.)?(?:
paramountplus\.com/(?:shows|movies)/(?:video|[^/]+/video|[^/]+)/
)(?P<id>[\w-]+))'''
# All tests are blocked outside US
_TESTS = [{
'url': 'https://www.paramountplus.com/shows/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/',
'info_dict': {
'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k',
'ext': 'mp4',
'title': 'CatDog - Climb Every CatDog/The Canine Mutiny',
'description': 'md5:7ac835000645a69933df226940e3c859',
'duration': 1426,
'timestamp': 920264400,
'upload_date': '19990301',
'uploader': 'CBSI-NEW',
'episode_number': 5,
'thumbnail': r're:https?://.+\.jpg$',
'season': 'Season 2',
'chapters': 'count:3',
'episode': 'Episode 5',
'season_number': 2,
'series': 'CatDog',
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://www.paramountplus.com/shows/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/',
'info_dict': {
'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd',
'ext': 'mp4',
'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)',
'description': 'md5:f4adcea3e8b106192022e121f1565bae',
'duration': 2506,
'timestamp': 1627063200,
'upload_date': '20210723',
'uploader': 'CBSI-NEW',
'episode_number': 81,
'thumbnail': r're:https?://.+\.jpg$',
'season': 'Season 2',
'chapters': 'count:4',
'episode': 'Episode 81',
'season_number': 2,
'series': 'Tooning Out The News',
},
'params': {
'skip_download': 'm3u8',
},
}, {
'url': 'https://www.paramountplus.com/movies/video/vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC/',
'info_dict': {
'id': 'vM2vm0kE6vsS2U41VhMRKTOVHyQAr6pC',
'ext': 'mp4',
'title': 'Daddy\'s Home',
'upload_date': '20151225',
'description': 'md5:9a6300c504d5e12000e8707f20c54745',
'uploader': 'CBSI-NEW',
'timestamp': 1451030400,
'thumbnail': r're:https?://.+\.jpg$',
'chapters': 'count:0',
'duration': 5761,
'series': 'Paramount+ Movies',
},
'params': {
'skip_download': 'm3u8',
},
'skip': 'DRM',
}, {
'url': 'https://www.paramountplus.com/movies/video/5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc/',
'info_dict': {
'id': '5EKDXPOzdVf9voUqW6oRuocyAEeJGbEc',
'ext': 'mp4',
'uploader': 'CBSI-NEW',
'description': 'md5:bc7b6fea84ba631ef77a9bda9f2ff911',
'timestamp': 1577865600,
'title': 'Sonic the Hedgehog',
'upload_date': '20200101',
'thumbnail': r're:https?://.+\.jpg$',
'chapters': 'count:0',
'duration': 5932,
'series': 'Paramount+ Movies',
},
'params': {
'skip_download': 'm3u8',
},
'skip': 'DRM',
}, {
'url': 'https://www.paramountplus.com/shows/the-real-world/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/the-real-world-reunion/',
'only_matching': True,
}, {
'url': 'https://www.paramountplus.com/shows/video/mOVeHeL9ub9yWdyzSZFYz8Uj4ZBkVzQg/',
'only_matching': True,
}, {
'url': 'https://www.paramountplus.com/movies/video/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/',
'only_matching': True,
}, {
'url': 'https://www.paramountplus.com/movies/paw-patrol-the-movie/W0VyStQqUnqKzJkrpSAIARuCc9YuYGNy/',
'only_matching': True,
}]
def _extract_video_info(self, content_id, mpx_acc=2198311517):
items_data = self._download_json(
f'https://www.paramountplus.com/apps-api/v2.0/androidtv/video/cid/{content_id}.json',
content_id, query={
'locale': 'en-us',
'at': 'ABCXgPuoStiPipsK0OHVXIVh68zNys+G4f7nW9R6qH68GDOcneW6Kg89cJXGfiQCsj0=',
}, headers=self.geo_verification_headers())
asset_types = {
item.get('assetType'): {
'format': 'SMIL',
'formats': 'M3U+none,MPEG4', # '+none' specifies ProtectionScheme (no DRM)
} for item in items_data['itemList']
}
item = items_data['itemList'][-1]
info, error = {}, None
metadata = {
'title': item.get('title'),
'series': item.get('seriesTitle'),
'season_number': int_or_none(item.get('seasonNum')),
'episode_number': int_or_none(item.get('episodeNum')),
'duration': int_or_none(item.get('duration')),
'thumbnail': url_or_none(item.get('thumbnail')),
}
try:
info = self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info=metadata)
except ExtractorError as e:
error = e
# Check for DRM formats to give appropriate error
if not info.get('formats'):
for query in asset_types.values():
query['formats'] = 'MPEG-DASH,M3U,MPEG4' # allows DRM formats
try:
drm_info = self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info=metadata)
except ExtractorError:
if error:
raise error from None
raise
if drm_info['formats']:
self.report_drm(content_id)
elif error:
raise error
return info
class ParamountPlusSeriesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?paramountplus\.com/shows/(?P<id>[a-zA-Z0-9-_]+)/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://www.paramountplus.com/shows/drake-josh',
'playlist_mincount': 50,
'info_dict': {
'id': 'drake-josh',
},
}, {
'url': 'https://www.paramountplus.com/shows/hawaii_five_0/',
'playlist_mincount': 240,
'info_dict': {
'id': 'hawaii_five_0',
},
}, {
'url': 'https://www.paramountplus.com/shows/spongebob-squarepants/',
'playlist_mincount': 248,
'info_dict': {
'id': 'spongebob-squarepants',
},
}]
def _entries(self, show_name):
for page in itertools.count():
show_json = self._download_json(
f'https://www.paramountplus.com/shows/{show_name}/xhr/episodes/page/{page}/size/50/xs/0/season/0', show_name)
if not show_json.get('success'):
return
for episode in show_json['result']['data']:
yield self.url_result(
'https://www.paramountplus.com{}'.format(episode['url']),
ie=ParamountPlusIE.ie_key(), video_id=episode['content_id'])
def _real_extract(self, url):
show_name = self._match_id(url)
return self.playlist_result(self._entries(show_name), playlist_id=show_name)

@ -1,119 +0,0 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
int_or_none,
parse_qs,
qualities,
try_get,
)
class SixPlayIE(InfoExtractor):
IE_NAME = '6play'
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
'info_dict': {
'id': '12041051',
'ext': 'mp4',
'title': 'Le but qui a marqué l\'histoire du football français !',
'description': 'md5:b59e7e841d646ef1eb42a7868eb6a851',
},
}, {
'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
'only_matching': True,
}, {
'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989',
'only_matching': True,
}, {
'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787',
'only_matching': True,
}]
def _real_extract(self, url):
domain, video_id = self._match_valid_url(url).groups()
service, consumer_name = {
'6play.fr': ('6play', 'm6web'),
'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'),
'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'),
}.get(domain, ('6play', 'm6web'))
data = self._download_json(
f'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/{service}/videos/clip_{video_id}',
video_id, headers={
'x-customer-name': consumer_name,
}, query={
'csa': 5,
'with': 'clips',
})
clip_data = data['clips'][0]
title = clip_data['title']
urls = []
quality_key = qualities(['lq', 'sd', 'hq', 'hd'])
formats = []
subtitles = {}
assets = clip_data.get('assets') or []
for asset in assets:
asset_url = asset.get('full_physical_path')
protocol = asset.get('protocol')
if not asset_url or ((protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264') and not ('_drmnp.ism/' in asset_url or '_unpnp.ism/' in asset_url)) or asset_url in urls:
continue
urls.append(asset_url)
container = asset.get('video_container')
ext = determine_ext(asset_url)
if protocol == 'http_subtitle' or ext == 'vtt':
subtitles.setdefault('fr', []).append({'url': asset_url})
continue
if container == 'm3u8' or ext == 'm3u8':
if protocol == 'usp':
if parse_qs(asset_url).get('token', [None])[0]:
urlh = self._request_webpage(
asset_url, video_id, fatal=False,
headers=self.geo_verification_headers())
if not urlh:
continue
asset_url = urlh.url
asset_url = asset_url.replace('_drmnp.ism/', '_unpnp.ism/')
for i in range(3, 0, -1):
asset_url = asset_url.replace('_sd1/', f'_sd{i}/')
m3u8_formats = self._extract_m3u8_formats(
asset_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
formats.extend(self._extract_mpd_formats(
asset_url.replace('.m3u8', '.mpd'),
video_id, mpd_id='dash', fatal=False))
if m3u8_formats:
break
else:
formats.extend(self._extract_m3u8_formats(
asset_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
elif container == 'mp4' or ext == 'mp4':
quality = asset.get('video_quality')
formats.append({
'url': asset_url,
'format_id': quality,
'quality': quality_key(quality),
'ext': ext,
})
def get(getter):
for src in (data, clip_data):
v = try_get(src, getter, str)
if v:
return v
return {
'id': video_id,
'title': title,
'description': get(lambda x: x['description']),
'duration': int_or_none(clip_data.get('duration')),
'series': get(lambda x: x['program']['title']),
'formats': formats,
'subtitles': subtitles,
}

@ -1,167 +0,0 @@
import functools
import json
import re
from .common import InfoExtractor
from ..utils import (
OnDemandPagedList,
clean_podcast_url,
float_or_none,
int_or_none,
strip_or_none,
traverse_obj,
try_get,
unified_strdate,
)
class SpotifyBaseIE(InfoExtractor):
_WORKING = False
_ACCESS_TOKEN = None
_OPERATION_HASHES = {
'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
}
_VALID_URL_TEMPL = r'https?://open\.spotify\.com/(?:embed-podcast/|embed/|)%s/(?P<id>[^/?&#]+)'
_EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://open\.spotify.com/embed/[^"]+)"']
def _real_initialize(self):
self._ACCESS_TOKEN = self._download_json(
'https://open.spotify.com/get_access_token', None)['accessToken']
def _call_api(self, operation, video_id, variables, **kwargs):
return self._download_json(
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
'operationName': 'query' + operation,
'variables': json.dumps(variables),
'extensions': json.dumps({
'persistedQuery': {
'sha256Hash': self._OPERATION_HASHES[operation],
},
}),
}, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN},
**kwargs)['data']
def _extract_episode(self, episode, series):
episode_id = episode['id']
title = episode['name'].strip()
formats = []
audio_preview = episode.get('audioPreview') or {}
audio_preview_url = audio_preview.get('url')
if audio_preview_url:
f = {
'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
'vcodec': 'none',
}
audio_preview_format = audio_preview.get('format')
if audio_preview_format:
f['format_id'] = audio_preview_format
mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
if mobj:
f.update({
'abr': int(mobj.group(2)),
'ext': mobj.group(1).lower(),
})
formats.append(f)
for item in (try_get(episode, lambda x: x['audio']['items']) or []):
item_url = item.get('url')
if not (item_url and item.get('externallyHosted')):
continue
formats.append({
'url': clean_podcast_url(item_url),
'vcodec': 'none',
})
thumbnails = []
for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
source_url = source.get('url')
if not source_url:
continue
thumbnails.append({
'url': source_url,
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
})
return {
'id': episode_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': strip_or_none(episode.get('description')),
'duration': float_or_none(try_get(
episode, lambda x: x['duration']['totalMilliseconds']), 1000),
'release_date': unified_strdate(try_get(
episode, lambda x: x['releaseDate']['isoString'])),
'series': series,
}
class SpotifyIE(SpotifyBaseIE):
IE_NAME = 'spotify'
IE_DESC = 'Spotify episodes'
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
_TESTS = [{
'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
'info_dict': {
'id': '4Z7GAJ50bgctf6uclHlWKo',
'ext': 'mp3',
'title': 'From the archive: Why time management is ruining our lives',
'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
'duration': 2083.605,
'release_date': '20201217',
'series': "The Guardian's Audio Long Reads",
},
}, {
'url': 'https://open.spotify.com/embed/episode/4TvCsKKs2thXmarHigWvXE?si=7eatS8AbQb6RxqO2raIuWA',
'only_matching': True,
}]
def _real_extract(self, url):
episode_id = self._match_id(url)
episode = self._call_api('Episode', episode_id, {
'uri': 'spotify:episode:' + episode_id,
})['episode']
return self._extract_episode(
episode, try_get(episode, lambda x: x['podcast']['name']))
class SpotifyShowIE(SpotifyBaseIE):
IE_NAME = 'spotify:show'
IE_DESC = 'Spotify shows'
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
_TEST = {
'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
'info_dict': {
'id': '4PM9Ke6l66IRNpottHKV9M',
'title': 'The Story from the Guardian',
'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
},
'playlist_mincount': 36,
}
_PER_PAGE = 100
def _fetch_page(self, show_id, page=0):
return self._call_api('ShowEpisodes', show_id, {
'limit': 100,
'offset': page * self._PER_PAGE,
'uri': f'spotify:show:{show_id}',
}, note=f'Downloading page {page + 1} JSON metadata')['podcast']
def _real_extract(self, url):
show_id = self._match_id(url)
first_page = self._fetch_page(show_id)
def _entries(page):
podcast = self._fetch_page(show_id, page) if page else first_page
yield from map(
functools.partial(self._extract_episode, series=podcast.get('name')),
traverse_obj(podcast, ('episodes', 'items', ..., 'episode')))
return self.playlist_result(
OnDemandPagedList(_entries, self._PER_PAGE),
show_id, first_page.get('name'), first_page.get('description'))

@ -30,13 +30,13 @@ class KnownDRMIE(UnsupportedInfoExtractor):
r'play\.hbomax\.com',
r'channel(?:4|5)\.com',
r'peacocktv\.com',
r'(?:[\w\.]+\.)?disneyplus\.com',
r'open\.spotify\.com/(?:track|playlist|album|artist)',
r'(?:[\w.]+\.)?disneyplus\.com',
r'open\.spotify\.com',
r'tvnz\.co\.nz',
r'oneplus\.ch',
r'artstation\.com/learning/courses',
r'philo\.com',
r'(?:[\w\.]+\.)?mech-plus\.com',
r'(?:[\w.]+\.)?mech-plus\.com',
r'aha\.video',
r'mubi\.com',
r'vootkids\.com',
@ -57,6 +57,14 @@ class KnownDRMIE(UnsupportedInfoExtractor):
r'ctv\.ca',
r'noovo\.ca',
r'tsn\.ca',
r'paramountplus\.com',
r'(?:m\.)?(?:sony)?crackle\.com',
r'cw(?:tv(?:pr)?|seed)\.com',
r'6play\.fr',
r'rtlplay\.be',
r'play\.rtl\.hr',
r'rtlmost\.hu',
r'plus\.rtl\.de(?!/podcast/)',
)
_TESTS = [{
@ -78,10 +86,7 @@ class KnownDRMIE(UnsupportedInfoExtractor):
'url': r'https://www.disneyplus.com',
'only_matching': True,
}, {
'url': 'https://open.spotify.com/artist/',
'only_matching': True,
}, {
'url': 'https://open.spotify.com/track/',
'url': 'https://open.spotify.com',
'only_matching': True,
}, {
# https://github.com/yt-dlp/yt-dlp/issues/4122
@ -184,6 +189,39 @@ class KnownDRMIE(UnsupportedInfoExtractor):
}, {
'url': 'https://www.tsn.ca/video/relaxed-oilers-look-to-put-emotional-game-2-loss-in-the-rearview%7E3148747',
'only_matching': True,
}, {
'url': 'https://www.paramountplus.com',
'only_matching': True,
}, {
'url': 'https://www.crackle.com',
'only_matching': True,
}, {
'url': 'https://m.sonycrackle.com',
'only_matching': True,
}, {
'url': 'https://www.cwtv.com',
'only_matching': True,
}, {
'url': 'https://www.cwseed.com',
'only_matching': True,
}, {
'url': 'https://cwtvpr.com',
'only_matching': True,
}, {
'url': 'https://www.6play.fr',
'only_matching': True,
}, {
'url': 'https://www.rtlplay.be',
'only_matching': True,
}, {
'url': 'https://play.rtl.hr',
'only_matching': True,
}, {
'url': 'https://www.rtlmost.hu',
'only_matching': True,
}, {
'url': 'https://plus.rtl.de/video-tv/',
'only_matching': True,
}]
def _real_extract(self, url):
@ -222,6 +260,7 @@ class KnownPiracyIE(UnsupportedInfoExtractor):
r'91porn\.com',
r'einthusan\.(?:tv|com|ca)',
r'yourupload\.com',
r'xanimu\.com',
)
_TESTS = [{

@ -1,52 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import int_or_none
class XanimuIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?xanimu\.com/(?P<id>[^/]+)/?'
_TESTS = [{
'url': 'https://xanimu.com/51944-the-princess-the-frog-hentai/',
'md5': '899b88091d753d92dad4cb63bbf357a7',
'info_dict': {
'id': '51944-the-princess-the-frog-hentai',
'ext': 'mp4',
'title': 'The Princess + The Frog Hentai',
'thumbnail': 'https://xanimu.com/storage/2020/09/the-princess-and-the-frog-hentai.jpg',
'description': r're:^Enjoy The Princess \+ The Frog Hentai',
'duration': 207.0,
'age_limit': 18,
},
}, {
'url': 'https://xanimu.com/huge-expansion/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
formats = []
for format_id in ['videoHigh', 'videoLow']:
format_url = self._search_json(
rf'var\s+{re.escape(format_id)}\s*=', webpage, format_id,
video_id, default=None, contains_pattern=r'[\'"]([^\'"]+)[\'"]')
if format_url:
formats.append({
'url': format_url,
'format_id': format_id,
'quality': -2 if format_id.endswith('Low') else None,
})
return {
'id': video_id,
'formats': formats,
'title': self._search_regex(r'[\'"]headline[\'"]:\s*[\'"]([^"]+)[\'"]', webpage,
'title', default=None) or self._html_extract_title(webpage),
'thumbnail': self._html_search_meta('thumbnailUrl', webpage, default=None),
'description': self._html_search_meta('description', webpage, default=None),
'duration': int_or_none(self._search_regex(r'duration:\s*[\'"]([^\'"]+?)[\'"]',
webpage, 'duration', fatal=False)),
'age_limit': 18,
}
Loading…
Cancel
Save