Refactor playlist support for arte.tv to use API endpoint, instead of parsing HTML

pull/13191/head
1100101 2 days ago
parent 57ae7d34c3
commit 1257a8bd66

@ -309,42 +309,40 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
}, },
}] }]
def _entries(self, playlist_data, playlist_id): def _entries(self, season_ids, lang, playlist_id):
playlist_item_filter = lambda _, v: re.match(rf'collection_(?:videos|subcollection)_{playlist_id}', v['code']) for season_id in season_ids:
collections = traverse_obj(playlist_data, season_data = self._download_json(f'{self._API_BASE}/playlist/{lang}/{season_id}', season_id, headers={
('data', 'x-validated-age': '18',
'zones', })
playlist_item_filter,
'content', collection = traverse_obj(season_data, ('data', 'attributes', 'items'))
'data',
...)) for video in collection:
for video in collections:
yield { yield {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'https://www.arte.tv' + video['url'], 'url': traverse_obj(video, ('link', 'url')),
'ie_key': ArteTVIE.ie_key(), 'ie_key': ArteTVIE.ie_key(),
'id': video['id'], 'id': video['providerId'],
'title': video.get('title'), 'title': video.get('title'),
'alt_title': video.get('subtitle'), 'alt_title': video.get('subtitle'),
'duration': int_or_none(traverse_obj(video, ('duration'))), 'duration': int_or_none(traverse_obj(video, ('duration', 'seconds'))),
'age_limit': int_or_none(traverse_obj(video, 'ageRating')), 'age_limit': int_or_none(traverse_obj(video, 'ageRating')),
} }
def _real_extract(self, url): def _real_extract(self, url):
_API_TOKEN = 'Nzc1Yjc1ZjJkYjk1NWFhN2I2MWEwMmRlMzAzNjI5NmU3NWU3ODg4ODJjOWMxNTMxYzEzZGRjYjg2ZGE4MmIwOA'
lang, playlist_id = self._match_valid_url(url).group('lang', 'id') lang, playlist_id = self._match_valid_url(url).group('lang', 'id')
webpage = self._download_webpage(url, playlist_id)
unescape_func = lambda jstring: jstring.replace('\\"', '"').replace('\\\\', '\\') playlist_info = self._download_json(f'https://api.arte.tv/api/opa/v3/programs/{lang}/{playlist_id}', playlist_id,
json_data = self._search_json(r'\$L23.+?', webpage, 'series data', headers={
playlist_id, 'Authorization': f'Bearer {_API_TOKEN}',
end_pattern=r'\],\[\[', })
transform_source=unescape_func)
return self.playlist_result(self._entries(json_data, playlist_id), season_ids = traverse_obj(playlist_info, ('programs', ..., 'children', (lambda _, v: v['catalogType'] == 'SEASON'), 'programId'))
return self.playlist_result(self._entries(season_ids, lang, playlist_id),
playlist_id, playlist_id,
traverse_obj(json_data, ('data', 'metadata', 'title')), traverse_obj(playlist_info, ('programs', ..., 'title')),
traverse_obj(json_data, ('data', 'metadata', 'description'))) traverse_obj(playlist_info, ('programs', ..., 'shortDescription')))
class ArteTVCategoryIE(ArteTVBaseIE): class ArteTVCategoryIE(ArteTVBaseIE):

Loading…
Cancel
Save