|
|
|
@ -14,16 +14,18 @@ from ..utils import (
|
|
|
|
|
js_to_json,
|
|
|
|
|
mimetype2ext,
|
|
|
|
|
orderedSet,
|
|
|
|
|
parse_age_limit,
|
|
|
|
|
parse_iso8601,
|
|
|
|
|
replace_extension,
|
|
|
|
|
smuggle_url,
|
|
|
|
|
strip_or_none,
|
|
|
|
|
traverse_obj,
|
|
|
|
|
try_get,
|
|
|
|
|
unified_timestamp,
|
|
|
|
|
update_url,
|
|
|
|
|
url_basename,
|
|
|
|
|
url_or_none,
|
|
|
|
|
)
|
|
|
|
|
from ..utils.traversal import require, traverse_obj, trim_str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CBCIE(InfoExtractor):
|
|
|
|
@ -516,9 +518,43 @@ class CBCPlayerPlaylistIE(InfoExtractor):
|
|
|
|
|
return self.playlist_result(entries(), playlist_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CBCGemIE(InfoExtractor):
|
|
|
|
|
class CBCGemBaseIE(InfoExtractor):
|
|
|
|
|
_NETRC_MACHINE = 'cbcgem'
|
|
|
|
|
_GEO_COUNTRIES = ['CA']
|
|
|
|
|
|
|
|
|
|
def _call_show_api(self, item_id, display_id=None):
|
|
|
|
|
return self._download_json(
|
|
|
|
|
f'https://services.radio-canada.ca/ott/catalog/v2/gem/show/{item_id}',
|
|
|
|
|
display_id or item_id, query={'device': 'web'})
|
|
|
|
|
|
|
|
|
|
def _extract_item_info(self, item_info):
|
|
|
|
|
episode_number = None
|
|
|
|
|
title = traverse_obj(item_info, ('title', {str}))
|
|
|
|
|
if title and (mobj := re.match(r'(?P<episode>\d+)\. (?P<title>.+)', title)):
|
|
|
|
|
episode_number = int_or_none(mobj.group('episode'))
|
|
|
|
|
title = mobj.group('title')
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
'episode_number': episode_number,
|
|
|
|
|
**traverse_obj(item_info, {
|
|
|
|
|
'id': ('url', {str}),
|
|
|
|
|
'episode_id': ('url', {str}),
|
|
|
|
|
'description': ('description', {str}),
|
|
|
|
|
'thumbnail': ('images', 'card', 'url', {url_or_none}, {update_url(query=None)}),
|
|
|
|
|
'episode_number': ('episodeNumber', {int_or_none}),
|
|
|
|
|
'duration': ('metadata', 'duration', {int_or_none}),
|
|
|
|
|
'release_timestamp': ('metadata', 'airDate', {unified_timestamp}),
|
|
|
|
|
'timestamp': ('metadata', 'availabilityDate', {unified_timestamp}),
|
|
|
|
|
'age_limit': ('metadata', 'rating', {trim_str(start='C')}, {parse_age_limit}),
|
|
|
|
|
}),
|
|
|
|
|
'episode': title,
|
|
|
|
|
'title': title,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CBCGemIE(CBCGemBaseIE):
|
|
|
|
|
IE_NAME = 'gem.cbc.ca'
|
|
|
|
|
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s[0-9]+[a-z][0-9]+)'
|
|
|
|
|
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>[0-9a-z-]+/s(?P<season>[0-9]+)[a-z][0-9]+)'
|
|
|
|
|
_TESTS = [{
|
|
|
|
|
# This is a normal, public, TV show video
|
|
|
|
|
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
|
|
|
|
@ -529,7 +565,7 @@ class CBCGemIE(InfoExtractor):
|
|
|
|
|
'description': 'md5:929868d20021c924020641769eb3e7f1',
|
|
|
|
|
'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
|
|
|
|
|
'duration': 1324,
|
|
|
|
|
'categories': ['comedy'],
|
|
|
|
|
'genres': ['Comédie et humour'],
|
|
|
|
|
'series': 'Schitt\'s Creek',
|
|
|
|
|
'season': 'Season 6',
|
|
|
|
|
'season_number': 6,
|
|
|
|
@ -537,9 +573,10 @@ class CBCGemIE(InfoExtractor):
|
|
|
|
|
'episode_number': 1,
|
|
|
|
|
'episode_id': 'schitts-creek/s06e01',
|
|
|
|
|
'upload_date': '20210618',
|
|
|
|
|
'timestamp': 1623988800,
|
|
|
|
|
'timestamp': 1623974400,
|
|
|
|
|
'release_date': '20200107',
|
|
|
|
|
'release_timestamp': 1578427200,
|
|
|
|
|
'release_timestamp': 1578355200,
|
|
|
|
|
'age_limit': 14,
|
|
|
|
|
},
|
|
|
|
|
'params': {'format': 'bv'},
|
|
|
|
|
}, {
|
|
|
|
@ -557,12 +594,13 @@ class CBCGemIE(InfoExtractor):
|
|
|
|
|
'episode_number': 1,
|
|
|
|
|
'episode': 'The Cup Runneth Over',
|
|
|
|
|
'episode_id': 'schitts-creek/s01e01',
|
|
|
|
|
'duration': 1309,
|
|
|
|
|
'categories': ['comedy'],
|
|
|
|
|
'duration': 1308,
|
|
|
|
|
'genres': ['Comédie et humour'],
|
|
|
|
|
'upload_date': '20210617',
|
|
|
|
|
'timestamp': 1623902400,
|
|
|
|
|
'release_date': '20151124',
|
|
|
|
|
'release_timestamp': 1448323200,
|
|
|
|
|
'timestamp': 1623888000,
|
|
|
|
|
'release_date': '20151123',
|
|
|
|
|
'release_timestamp': 1448236800,
|
|
|
|
|
'age_limit': 14,
|
|
|
|
|
},
|
|
|
|
|
'params': {'format': 'bv'},
|
|
|
|
|
}, {
|
|
|
|
@ -570,9 +608,7 @@ class CBCGemIE(InfoExtractor):
|
|
|
|
|
'only_matching': True,
|
|
|
|
|
}]
|
|
|
|
|
|
|
|
|
|
_GEO_COUNTRIES = ['CA']
|
|
|
|
|
_TOKEN_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
|
|
|
|
|
_NETRC_MACHINE = 'cbcgem'
|
|
|
|
|
_claims_token = None
|
|
|
|
|
|
|
|
|
|
def _new_claims_token(self, email, password):
|
|
|
|
@ -634,10 +670,12 @@ class CBCGemIE(InfoExtractor):
|
|
|
|
|
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
|
video_id = self._match_id(url)
|
|
|
|
|
video_info = self._download_json(
|
|
|
|
|
f'https://services.radio-canada.ca/ott/cbc-api/v2/assets/{video_id}',
|
|
|
|
|
video_id, expected_status=426)
|
|
|
|
|
video_id, season_number = self._match_valid_url(url).group('id', 'season')
|
|
|
|
|
video_info = self._call_show_api(video_id)
|
|
|
|
|
item_info = traverse_obj(video_info, (
|
|
|
|
|
'content', ..., 'lineups', ..., 'items',
|
|
|
|
|
lambda _, v: v['url'] == video_id, any, {require('item info')}))
|
|
|
|
|
media_id = item_info['idMedia']
|
|
|
|
|
|
|
|
|
|
email, password = self._get_login_info()
|
|
|
|
|
if email and password:
|
|
|
|
@ -645,7 +683,20 @@ class CBCGemIE(InfoExtractor):
|
|
|
|
|
headers = {'x-claims-token': claims_token}
|
|
|
|
|
else:
|
|
|
|
|
headers = {}
|
|
|
|
|
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
|
|
|
|
|
|
|
|
|
|
m3u8_info = self._download_json(
|
|
|
|
|
'https://services.radio-canada.ca/media/validation/v2/',
|
|
|
|
|
video_id, headers=headers, query={
|
|
|
|
|
'appCode': 'gem',
|
|
|
|
|
'connectionType': 'hd',
|
|
|
|
|
'deviceType': 'ipad',
|
|
|
|
|
'multibitrate': 'true',
|
|
|
|
|
'output': 'json',
|
|
|
|
|
'tech': 'hls',
|
|
|
|
|
'manifestVersion': '2',
|
|
|
|
|
'manifestType': 'desktop',
|
|
|
|
|
'idMedia': media_id,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
if m3u8_info.get('errorCode') == 1:
|
|
|
|
|
self.raise_geo_restricted(countries=['CA'])
|
|
|
|
@ -671,26 +722,20 @@ class CBCGemIE(InfoExtractor):
|
|
|
|
|
fmt['preference'] = -2
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
'season_number': int_or_none(season_number),
|
|
|
|
|
**traverse_obj(video_info, {
|
|
|
|
|
'series': ('title', {str}),
|
|
|
|
|
'season_number': ('structuredMetadata', 'partofSeason', 'seasonNumber', {int_or_none}),
|
|
|
|
|
'genres': ('structuredMetadata', 'genre', ..., {str}),
|
|
|
|
|
}),
|
|
|
|
|
**self._extract_item_info(item_info),
|
|
|
|
|
'id': video_id,
|
|
|
|
|
'episode_id': video_id,
|
|
|
|
|
'formats': formats,
|
|
|
|
|
**traverse_obj(video_info, {
|
|
|
|
|
'title': ('title', {str}),
|
|
|
|
|
'episode': ('title', {str}),
|
|
|
|
|
'description': ('description', {str}),
|
|
|
|
|
'thumbnail': ('image', {url_or_none}),
|
|
|
|
|
'series': ('series', {str}),
|
|
|
|
|
'season_number': ('season', {int_or_none}),
|
|
|
|
|
'episode_number': ('episode', {int_or_none}),
|
|
|
|
|
'duration': ('duration', {int_or_none}),
|
|
|
|
|
'categories': ('category', {str}, all),
|
|
|
|
|
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
|
|
|
|
|
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
|
|
|
|
|
}),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CBCGemPlaylistIE(InfoExtractor):
|
|
|
|
|
class CBCGemPlaylistIE(CBCGemBaseIE):
|
|
|
|
|
IE_NAME = 'gem.cbc.ca:playlist'
|
|
|
|
|
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
|
|
|
|
|
_TESTS = [{
|
|
|
|
@ -700,70 +745,35 @@ class CBCGemPlaylistIE(InfoExtractor):
|
|
|
|
|
'info_dict': {
|
|
|
|
|
'id': 'schitts-creek/s06',
|
|
|
|
|
'title': 'Season 6',
|
|
|
|
|
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
|
|
|
|
|
'series': 'Schitt\'s Creek',
|
|
|
|
|
'season_number': 6,
|
|
|
|
|
'season': 'Season 6',
|
|
|
|
|
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/season/perso/cbc_schitts_creek_season_06_carousel_v03.jpg?impolicy=ott&im=Resize=(_Size_)&quality=75',
|
|
|
|
|
},
|
|
|
|
|
}, {
|
|
|
|
|
'url': 'https://gem.cbc.ca/schitts-creek/s06',
|
|
|
|
|
'only_matching': True,
|
|
|
|
|
}]
|
|
|
|
|
_API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
|
match = self._match_valid_url(url)
|
|
|
|
|
season_id = match.group('id')
|
|
|
|
|
show = match.group('show')
|
|
|
|
|
show_info = self._download_json(self._API_BASE + show, season_id, expected_status=426)
|
|
|
|
|
season = int(match.group('season'))
|
|
|
|
|
|
|
|
|
|
season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None)
|
|
|
|
|
|
|
|
|
|
if season_info is None:
|
|
|
|
|
raise ExtractorError(f'Couldn\'t find season {season} of {show}')
|
|
|
|
|
def _entries(self, season_info):
|
|
|
|
|
for episode in traverse_obj(season_info, ('items', lambda _, v: v['url'])):
|
|
|
|
|
yield self.url_result(
|
|
|
|
|
f'https://gem.cbc.ca/media/{episode["url"]}', CBCGemIE,
|
|
|
|
|
**self._extract_item_info(episode))
|
|
|
|
|
|
|
|
|
|
episodes = []
|
|
|
|
|
for episode in season_info['assets']:
|
|
|
|
|
episodes.append({
|
|
|
|
|
'_type': 'url_transparent',
|
|
|
|
|
'ie_key': 'CBCGem',
|
|
|
|
|
'url': 'https://gem.cbc.ca/media/' + episode['id'],
|
|
|
|
|
'id': episode['id'],
|
|
|
|
|
'title': episode.get('title'),
|
|
|
|
|
'description': episode.get('description'),
|
|
|
|
|
'thumbnail': episode.get('image'),
|
|
|
|
|
'series': episode.get('series'),
|
|
|
|
|
'season_number': episode.get('season'),
|
|
|
|
|
'season': season_info['title'],
|
|
|
|
|
'season_id': season_info.get('id'),
|
|
|
|
|
'episode_number': episode.get('episode'),
|
|
|
|
|
'episode': episode.get('title'),
|
|
|
|
|
'episode_id': episode['id'],
|
|
|
|
|
'duration': episode.get('duration'),
|
|
|
|
|
'categories': [episode.get('category')],
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
thumbnail = None
|
|
|
|
|
tn_uri = season_info.get('image')
|
|
|
|
|
# the-national was observed to use a "data:image/png;base64"
|
|
|
|
|
# URI for their 'image' value. The image was 1x1, and is
|
|
|
|
|
# probably just a placeholder, so it is ignored.
|
|
|
|
|
if tn_uri is not None and not tn_uri.startswith('data:'):
|
|
|
|
|
thumbnail = tn_uri
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
|
season_id, show, season = self._match_valid_url(url).group('id', 'show', 'season')
|
|
|
|
|
show_info = self._call_show_api(show, display_id=season_id)
|
|
|
|
|
season_info = traverse_obj(show_info, (
|
|
|
|
|
'content', ..., 'lineups',
|
|
|
|
|
lambda _, v: v['seasonNumber'] == int(season), any, {require('season info')}))
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
'_type': 'playlist',
|
|
|
|
|
'entries': episodes,
|
|
|
|
|
'id': season_id,
|
|
|
|
|
'title': season_info['title'],
|
|
|
|
|
'description': season_info.get('description'),
|
|
|
|
|
'thumbnail': thumbnail,
|
|
|
|
|
'series': show_info.get('title'),
|
|
|
|
|
'season_number': season_info.get('season'),
|
|
|
|
|
'season': season_info['title'],
|
|
|
|
|
}
|
|
|
|
|
return self.playlist_result(
|
|
|
|
|
self._entries(season_info), season_id,
|
|
|
|
|
**traverse_obj(season_info, {
|
|
|
|
|
'title': ('title', {str}),
|
|
|
|
|
'season': ('title', {str}),
|
|
|
|
|
'season_number': ('seasonNumber', {int_or_none}),
|
|
|
|
|
}), series=traverse_obj(show_info, ('title', {str})))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class CBCGemLiveIE(InfoExtractor):
|
|
|
|
|