From 8cb037c0b06c2815080f87d61ea2e95c412785fc Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Fri, 12 Sep 2025 05:59:54 +0900 Subject: [PATCH] [ie/smotrim] Rework extractors (#14200) Closes #9372, Closes #11804, Closes #13900 Authored by: doe1080, swayll Co-authored-by: Nikolay Fedorov <40500428+swayll@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 9 +- yt_dlp/extractor/rutv.py | 191 --------------- yt_dlp/extractor/smotrim.py | 422 ++++++++++++++++++++++++++++---- yt_dlp/extractor/vesti.py | 119 --------- 4 files changed, 386 insertions(+), 355 deletions(-) delete mode 100644 yt_dlp/extractor/rutv.py delete mode 100644 yt_dlp/extractor/vesti.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index f08a1aaab4..651168143c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1779,7 +1779,6 @@ from .rutube import ( RutubePlaylistIE, RutubeTagsIE, ) -from .rutv import RUTVIE from .ruutu import RuutuIE from .ruv import ( RuvIE, @@ -1877,7 +1876,12 @@ from .skynewsau import SkyNewsAUIE from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE -from .smotrim import SmotrimIE +from .smotrim import ( + SmotrimAudioIE, + SmotrimIE, + SmotrimLiveIE, + SmotrimPlaylistIE, +) from .snapchat import SnapchatSpotlightIE from .snotr import SnotrIE from .softwhiteunderbelly import SoftWhiteUnderbellyIE @@ -2284,7 +2288,6 @@ from .utreon import UtreonIE from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veo import VeoIE -from .vesti import VestiIE from .vevo import ( VevoIE, VevoPlaylistIE, diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py deleted file mode 100644 index 11270a1f2c..0000000000 --- a/yt_dlp/extractor/rutv.py +++ /dev/null @@ -1,191 +0,0 @@ -import re - -from .common import InfoExtractor -from ..utils import ExtractorError, int_or_none, str_to_int - - -class RUTVIE(InfoExtractor): - IE_DESC = 'RUTV.RU' - _VALID_URL = r'''(?x) - https?:// - (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/ - (?P - flash\d+v/container\.swf\?id=| - iframe/(?Pswf|video|live)/id/| - index/iframe/cast_id/ - ) - (?P\d+) - ''' - _EMBED_REGEX = [ - r']+?src=(["\'])(?Phttps?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', - r']+?property=(["\'])og:video\1[^>]+?content=(["\'])(?Phttps?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)', - ] - - _TESTS = [{ - 'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724', - 'info_dict': { - 'id': '774471', - 'ext': 'mp4', - 'title': 'Монологи на все времена. Концерт', - 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5', - 'duration': 2906, - 'thumbnail': r're:https?://cdn-st2\.smotrim\.ru/.+\.jpg', - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638', - 'info_dict': { - 'id': '774016', - 'ext': 'mp4', - 'title': 'Чужой в семье Сталина', - 'description': '', - 'duration': 2539, - }, - 'skip': 'Invalid URL', - }, { - 'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000', - 'info_dict': { - 'id': '766888', - 'ext': 'mp4', - 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', - 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', - 'duration': 279, - 'thumbnail': r're:https?://cdn-st2\.smotrim\.ru/.+\.jpg', - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169', - 'info_dict': { - 'id': '771852', - 'ext': 'mp4', - 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет', - 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8', - 'duration': 3096, - 'thumbnail': r're:https?://cdn-st2\.smotrim\.ru/.+\.jpg', - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014', - 'info_dict': { - 'id': '51499', - 'ext': 'flv', - 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', - 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', - }, - 'skip': 'Invalid URL', - }, { - 'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/', - 'info_dict': { - 'id': '21', - 'ext': 'mp4', - 'title': str, - 'is_live': True, - }, - 'skip': 'Invalid URL', - }, { - 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/', - 'only_matching': True, - }] - _WEBPAGE_TESTS = [{ - 'url': 'http://istoriya-teatra.ru/news/item/f00/s05/n0000545/index.shtml', - 'info_dict': { - 'id': '1952012', - 'ext': 'mp4', - 'title': 'Новости культуры. Эфир от 10.10.2019 (23:30). Театр Сатиры отмечает день рождения премьерой', - 'description': 'md5:fced27112ff01ff8fc4a452fc088bad6', - 'duration': 191, - 'thumbnail': r're:https?://cdn-st2\.smotrim\.ru/.+\.jpg', - }, - 'params': {'skip_download': 'm3u8'}, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - video_path = mobj.group('path') - - if re.match(r'flash\d+v', video_path): - video_type = 'video' - elif video_path.startswith('iframe'): - video_type = mobj.group('type') - if video_type == 'swf': - video_type = 'video' - elif video_path.startswith('index/iframe/cast_id'): - video_type = 'live' - - is_live = video_type == 'live' - - json_data = self._download_json( - 'http://player.vgtrk.com/iframe/data{}/id/{}'.format('live' if is_live else 'video', video_id), - video_id, 'Downloading JSON') - - if json_data['errors']: - raise ExtractorError('{} said: {}'.format(self.IE_NAME, json_data['errors']), expected=True) - - playlist = json_data['data']['playlist'] - medialist = playlist['medialist'] - media = medialist[0] - - if media['errors']: - raise ExtractorError('{} said: {}'.format(self.IE_NAME, media['errors']), expected=True) - - view_count = int_or_none(playlist.get('count_views')) - priority_transport = playlist['priority_transport'] - - thumbnail = media['picture'] - width = int_or_none(media['width']) - height = int_or_none(media['height']) - description = media['anons'] - title = media['title'] - duration = int_or_none(media.get('duration')) - - formats = [] - subtitles = {} - - for transport, links in media['sources'].items(): - for quality, url in links.items(): - preference = -1 if priority_transport == transport else -2 - if transport == 'rtmp': - mobj = re.search(r'^(?Prtmp://[^/]+/(?P.+))/(?P.+)$', url) - if not mobj: - continue - fmt = { - 'url': mobj.group('url'), - 'play_path': mobj.group('playpath'), - 'app': mobj.group('app'), - 'page_url': 'http://player.rutv.ru', - 'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22', - 'rtmp_live': True, - 'ext': 'flv', - 'vbr': str_to_int(quality), - } - elif transport == 'm3u8': - fmt, subs = self._extract_m3u8_formats_and_subtitles( - url, video_id, 'mp4', quality=preference, m3u8_id='hls') - formats.extend(fmt) - self._merge_subtitles(subs, target=subtitles) - continue - else: - fmt = { - 'url': url, - } - fmt.update({ - 'width': int_or_none(quality, default=height, invscale=width, scale=height), - 'height': int_or_none(quality, default=height), - 'format_id': f'{transport}-{quality}', - 'source_preference': preference, - }) - formats.append(fmt) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'view_count': view_count, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - 'is_live': is_live, - '_format_sort_fields': ('source', ), - } diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py index d3f1b695b3..098d369daf 100644 --- a/yt_dlp/extractor/smotrim.py +++ b/yt_dlp/extractor/smotrim.py @@ -1,65 +1,403 @@ +import functools +import json +import re +import urllib.parse + from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + OnDemandPagedList, + clean_html, + determine_ext, + extract_attributes, + int_or_none, + parse_iso8601, + str_or_none, + unescapeHTML, + url_or_none, + urljoin, +) +from ..utils.traversal import ( + find_element, + find_elements, + require, + traverse_obj, +) + + +class SmotrimBaseIE(InfoExtractor): + _BASE_URL = 'https://smotrim.ru' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['RU'] + + def _extract_from_smotrim_api(self, typ, item_id): + path = f'data{typ.replace("-", "")}/{"uid" if typ == "live" else "id"}' + data = self._download_json( + f'https://player.smotrim.ru/iframe/{path}/{item_id}/sid/smotrim', item_id) + media = traverse_obj(data, ('data', 'playlist', 'medialist', -1, {dict})) + if traverse_obj(media, ('locked', {bool})): + self.raise_login_required() + if error_msg := traverse_obj(media, ('errors', {clean_html})): + self.raise_geo_restricted(error_msg, countries=self._GEO_COUNTRIES) + + webpage_url = traverse_obj(data, ('data', 'template', 'share_url', {url_or_none})) + webpage = self._download_webpage(webpage_url, item_id) + common = { + 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None), + **traverse_obj(media, { + 'id': ('id', {str_or_none}), + 'title': (('episodeTitle', 'title'), {clean_html}, filter, any), + 'channel_id': ('channelId', {str_or_none}), + 'description': ('anons', {clean_html}, filter), + 'season': ('season', {clean_html}, filter), + 'series': (('brand_title', 'brandTitle'), {clean_html}, filter, any), + 'series_id': ('brand_id', {str_or_none}), + }), + } + + if typ == 'audio': + bookmark = self._search_json( + r'class="bookmark"[^>]+value\s*=\s*"', webpage, + 'bookmark', item_id, default={}, transform_source=unescapeHTML) + + metadata = { + 'vcodec': 'none', + **common, + **traverse_obj(media, { + 'ext': ('audio_url', {determine_ext(default_ext='mp3')}), + 'duration': ('duration', {int_or_none}), + 'url': ('audio_url', {url_or_none}), + }), + **traverse_obj(bookmark, { + 'title': ('subtitle', {clean_html}), + 'timestamp': ('published', {parse_iso8601}), + }), + } + elif typ == 'audio-live': + metadata = { + 'ext': 'mp3', + 'url': traverse_obj(media, ('source', 'auto', {url_or_none})), + 'vcodec': 'none', + **common, + } + else: + formats, subtitles = [], {} + for m3u8_url in traverse_obj(media, ( + 'sources', 'm3u8', {dict.values}, ..., {url_or_none}, + )): + fmts, subs = self._extract_m3u8_formats_and_subtitles( + m3u8_url, item_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + metadata = { + 'formats': formats, + 'subtitles': subtitles, + **self._search_json_ld(webpage, item_id), + **common, + } + return { + 'age_limit': traverse_obj(data, ('data', 'age_restrictions', {int_or_none})), + 'is_live': typ in ('audio-live', 'live'), + 'tags': traverse_obj(webpage, ( + {find_elements(cls='tags-list__link')}, ..., {clean_html}, filter, all, filter)), + 'webpage_url': webpage_url, + **metadata, + } -class SmotrimIE(InfoExtractor): - _VALID_URL = r'https?://smotrim\.ru/(?Pbrand|video|article|live)/(?P[0-9]+)' - _TESTS = [{ # video + +class SmotrimIE(SmotrimBaseIE): + IE_NAME = 'smotrim' + _VALID_URL = r'(?:https?:)?//(?:(?:player|www)\.)?smotrim\.ru(?:/iframe)?/video(?:/id)?/(?P\d+)' + _EMBED_REGEX = [fr']+\bsrc=["\'](?P{_VALID_URL})'] + _TESTS = [{ 'url': 'https://smotrim.ru/video/1539617', - 'md5': 'b1923a533c8cab09679789d720d0b1c5', 'info_dict': { 'id': '1539617', 'ext': 'mp4', - 'title': 'Полиглот. Китайский с нуля за 16 часов! Урок №16', - 'description': '', + 'title': 'Урок №16', + 'duration': 2631, + 'series': 'Полиглот. Китайский с нуля за 16 часов!', + 'series_id': '60562', + 'tags': 'mincount:6', + 'thumbnail': r're:https?://cdn-st\d+\.smotrim\.ru/.+\.(?:jpg|png)', + 'timestamp': 1466771100, + 'upload_date': '20160624', + 'view_count': int, + }, + }, { + 'url': 'https://player.smotrim.ru/iframe/video/id/2988590', + 'info_dict': { + 'id': '2988590', + 'ext': 'mp4', + 'title': 'Трейлер', + 'age_limit': 16, + 'description': 'md5:6af7e68ecf4ed7b8ff6720d20c4da47b', + 'duration': 30, + 'series': 'Мы в разводе', + 'series_id': '71624', + 'tags': 'mincount:5', + 'thumbnail': r're:https?://cdn-st\d+\.smotrim\.ru/.+\.(?:jpg|png)', + 'timestamp': 1750670040, + 'upload_date': '20250623', + 'view_count': int, + 'webpage_url': 'https://smotrim.ru/video/2988590', }, - 'add_ie': ['RUTV'], - }, { # article (geo-restricted? plays fine from the US and JP) + }] + _WEBPAGE_TESTS = [{ 'url': 'https://smotrim.ru/article/2813445', - 'md5': 'e0ac453952afbc6a2742e850b4dc8e77', 'info_dict': { 'id': '2431846', 'ext': 'mp4', - 'title': 'Новости культуры. Съёмки первой программы "Большие и маленькие"', - 'description': 'md5:94a4a22472da4252bf5587a4ee441b99', + 'title': 'Съёмки первой программы "Большие и маленькие"', + 'description': 'md5:446c9a5d334b995152a813946353f447', + 'duration': 240, + 'series': 'Новости культуры', + 'series_id': '19725', + 'tags': 'mincount:6', + 'thumbnail': r're:https?://cdn-st\d+\.smotrim\.ru/.+\.(?:jpg|png)', + 'timestamp': 1656054443, + 'upload_date': '20220624', + 'view_count': int, + 'webpage_url': 'https://smotrim.ru/video/2431846', }, - 'add_ie': ['RUTV'], - }, { # brand, redirect - 'url': 'https://smotrim.ru/brand/64356', - 'md5': '740472999ccff81d7f6df79cecd91c18', + }, { + 'url': 'https://www.vesti.ru/article/4642878', 'info_dict': { - 'id': '2354523', + 'id': '3007209', 'ext': 'mp4', - 'title': 'Большие и маленькие. Лучшее. 4-й выпуск', - 'description': 'md5:84089e834429008371ea41ea3507b989', + 'title': 'Иностранные мессенджеры используют не только мошенники, но и вербовщики', + 'description': 'md5:74ab625a0a89b87b2e0ed98d6391b182', + 'duration': 265, + 'series': 'Вести. Дежурная часть', + 'series_id': '5204', + 'tags': 'mincount:6', + 'thumbnail': r're:https?://cdn-st\d+\.smotrim\.ru/.+\.(?:jpg|png)', + 'timestamp': 1754756280, + 'upload_date': '20250809', + 'view_count': int, + 'webpage_url': 'https://smotrim.ru/video/3007209', }, - 'add_ie': ['RUTV'], - }, { # live - 'url': 'https://smotrim.ru/live/19201', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + return self._extract_from_smotrim_api('video', video_id) + + +class SmotrimAudioIE(SmotrimBaseIE): + IE_NAME = 'smotrim:audio' + _VALID_URL = r'https?://(?:(?:player|www)\.)?smotrim\.ru(?:/iframe)?/audio(?:/id)?/(?P\d+)' + _TESTS = [{ + 'url': 'https://smotrim.ru/audio/2573986', + 'md5': 'e28d94c20da524e242b2d00caef41a8e', + 'info_dict': { + 'id': '2573986', + 'ext': 'mp3', + 'title': 'Радиоспектакль', + 'description': 'md5:4bcaaf7d532bc78f76e478fad944e388', + 'duration': 3072, + 'series': 'Морис Леблан. Арсен Люпен, джентльмен-грабитель', + 'series_id': '66461', + 'tags': 'mincount:7', + 'thumbnail': r're:https?://cdn-st\d+\.smotrim\.ru/.+\.(?:jpg|png)', + 'timestamp': 1624884358, + 'upload_date': '20210628', + }, + }, { + 'url': 'https://player.smotrim.ru/iframe/audio/id/2860468', + 'md5': '5a6bc1fa24c7142958be1ad9cfae58a8', + 'info_dict': { + 'id': '2860468', + 'ext': 'mp3', + 'title': 'Колобок и музыкальная игра "Терем-теремок"', + 'duration': 1501, + 'series': 'Веселый колобок', + 'series_id': '68880', + 'tags': 'mincount:4', + 'thumbnail': r're:https?://cdn-st\d+\.smotrim\.ru/.+\.(?:jpg|png)', + 'timestamp': 1755925800, + 'upload_date': '20250823', + 'webpage_url': 'https://smotrim.ru/audio/2860468', + }, + }] + + def _real_extract(self, url): + audio_id = self._match_id(url) + + return self._extract_from_smotrim_api('audio', audio_id) + + +class SmotrimLiveIE(SmotrimBaseIE): + IE_NAME = 'smotrim:live' + _VALID_URL = r'''(?x: + (?:https?:)?// + (?:(?:(?:test)?player|www)\.)? + (?: + smotrim\.ru| + vgtrk\.com + ) + (?:/iframe)?/ + (?P + channel| + (?:audio-)?live + ) + (?:/u?id)?/(?P[\da-f-]+) + )''' + _EMBED_REGEX = [fr']+\bsrc=["\'](?P{_VALID_URL})'] + _TESTS = [{ + 'url': 'https://smotrim.ru/channel/76', + 'info_dict': { + 'id': '1661', + 'ext': 'mp4', + 'title': str, + 'channel_id': '76', + 'description': 'Смотрим прямой эфир «Москва 24»', + 'display_id': '76', + 'live_status': 'is_live', + 'thumbnail': r're:https?://cdn-st\d+\.smotrim\.ru/.+\.(?:jpg|png)', + 'timestamp': int, + 'upload_date': str, + }, + 'params': {'skip_download': 'Livestream'}, + }, { + # Radio + 'url': 'https://smotrim.ru/channel/81', + 'info_dict': { + 'id': '81', + 'ext': 'mp3', + 'title': str, + 'channel_id': '81', + 'live_status': 'is_live', + 'thumbnail': r're:https?://cdn-st\d+\.smotrim\.ru/.+\.(?:jpg|png)', + }, + 'params': {'skip_download': 'Livestream'}, + }, { + # Sometimes geo-restricted to Russia + 'url': 'https://player.smotrim.ru/iframe/live/uid/381308c7-a066-4c4f-9656-83e2e792a7b4', 'info_dict': { 'id': '19201', 'ext': 'mp4', - # this looks like a TV channel name - 'title': 'Россия Культура. Прямой эфир', - 'description': '', + 'title': str, + 'channel_id': '4', + 'description': 'Смотрим прямой эфир «Россия К»', + 'display_id': '381308c7-a066-4c4f-9656-83e2e792a7b4', + 'live_status': 'is_live', + 'thumbnail': r're:https?://cdn-st\d+\.smotrim\.ru/.+\.(?:jpg|png)', + 'timestamp': int, + 'upload_date': str, + 'webpage_url': 'https://smotrim.ru/channel/4', }, - 'add_ie': ['RUTV'], + 'params': {'skip_download': 'Livestream'}, + }, { + 'url': 'https://smotrim.ru/live/19201', + 'only_matching': True, + }, { + 'url': 'https://player.smotrim.ru/iframe/audio-live/id/81', + 'only_matching': True, + }, { + 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201', + 'only_matching': True, }] def _real_extract(self, url): - video_id, typ = self._match_valid_url(url).group('id', 'type') - rutv_type = 'video' - if typ not in ('video', 'live'): - webpage = self._download_webpage(url, video_id, f'Resolving {typ} link') - # there are two cases matching regex: - # 1. "embedUrl" in JSON LD (/brand/) - # 2. "src" attribute from iframe (/article/) - video_id = self._search_regex( - r'"https://player.smotrim.ru/iframe/video/id/(?P\d+)/', - webpage, 'video_id', default=None) - if not video_id: - raise ExtractorError('There are no video in this page.', expected=True) - elif typ == 'live': - rutv_type = 'live' - - return self.url_result(f'https://player.vgtrk.com/iframe/{rutv_type}/id/{video_id}') + typ, display_id = self._match_valid_url(url).group('type', 'id') + + if typ == 'live' and re.fullmatch(r'[0-9]+', display_id): + url = self._request_webpage(url, display_id).url + typ = self._match_valid_url(url).group('type') + + if typ == 'channel': + webpage = self._download_webpage(url, display_id) + src_url = traverse_obj(webpage, (( + ({find_element(cls='main-player__frame', html=True)}, {extract_attributes}, 'src'), + ({find_element(cls='audio-play-button', html=True)}, + {extract_attributes}, 'value', {urllib.parse.unquote}, {json.loads}, 'source'), + ), any, {self._proto_relative_url}, {url_or_none}, {require('src URL')})) + typ, video_id = self._match_valid_url(src_url).group('type', 'id') + else: + video_id = display_id + + return { + 'display_id': display_id, + **self._extract_from_smotrim_api(typ, video_id), + } + + +class SmotrimPlaylistIE(SmotrimBaseIE): + IE_NAME = 'smotrim:playlist' + _PAGE_SIZE = 15 + _VALID_URL = r'https?://smotrim\.ru/(?Pbrand|podcast)/(?P\d+)/?(?P[\w-]+)?' + _TESTS = [{ + # Video + 'url': 'https://smotrim.ru/brand/64356', + 'info_dict': { + 'id': '64356', + 'title': 'Большие и маленькие', + }, + 'playlist_mincount': 55, + }, { + # Video, season + 'url': 'https://smotrim.ru/brand/65293/3-sezon', + 'info_dict': { + 'id': '65293', + 'title': 'Спасская', + 'season': '3 сезон', + }, + 'playlist_count': 16, + }, { + # Audio + 'url': 'https://smotrim.ru/brand/68880', + 'info_dict': { + 'id': '68880', + 'title': 'Веселый колобок', + }, + 'playlist_mincount': 156, + }, { + # Podcast + 'url': 'https://smotrim.ru/podcast/8021', + 'info_dict': { + 'id': '8021', + 'title': 'Сила звука', + }, + 'playlist_mincount': 27, + }] + + def _fetch_page(self, endpoint, key, playlist_id, page): + page += 1 + items = self._download_json( + f'{self._BASE_URL}/api/{endpoint}', playlist_id, + f'Downloading page {page}', query={ + key: playlist_id, + 'limit': self._PAGE_SIZE, + 'page': page, + }, + ) + + for link in traverse_obj(items, ('contents', -1, 'list', ..., 'link', {str})): + yield self.url_result(urljoin(self._BASE_URL, link)) + + def _real_extract(self, url): + playlist_type, playlist_id, season = self._match_valid_url(url).group('type', 'id', 'season') + key = 'rubricId' if playlist_type == 'podcast' else 'brandId' + webpage = self._download_webpage(url, playlist_id) + playlist_title = self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None) + + if season: + return self.playlist_from_matches(traverse_obj(webpage, ( + {find_elements(tag='a', attr='href', value=r'/video/\d+', html=True, regex=True)}, + ..., {extract_attributes}, 'href', {str}, + )), playlist_id, playlist_title, season=traverse_obj(webpage, ( + {find_element(cls='seasons__item seasons__item--selected')}, {clean_html}, + )), ie=SmotrimIE, getter=urljoin(self._BASE_URL)) + + if traverse_obj(webpage, ( + {find_element(cls='brand-main-item__videos')}, {clean_html}, filter, + )): + endpoint = 'videos' + else: + endpoint = 'audios' + + return self.playlist_result(OnDemandPagedList( + functools.partial(self._fetch_page, endpoint, key, playlist_id), self._PAGE_SIZE), playlist_id, playlist_title) diff --git a/yt_dlp/extractor/vesti.py b/yt_dlp/extractor/vesti.py deleted file mode 100644 index 844041a61a..0000000000 --- a/yt_dlp/extractor/vesti.py +++ /dev/null @@ -1,119 +0,0 @@ -import re - -from .common import InfoExtractor -from .rutv import RUTVIE -from ..utils import ExtractorError - - -class VestiIE(InfoExtractor): - _WORKING = False - IE_DESC = 'Вести.Ru' - _VALID_URL = r'https?://(?:.+?\.)?vesti\.ru/(?P.+)' - - _TESTS = [ - { - 'url': 'http://www.vesti.ru/videos?vid=575582&cid=1', - 'info_dict': { - 'id': '765035', - 'ext': 'mp4', - 'title': 'Вести.net: биткоины в России не являются законными', - 'description': 'md5:d4bb3859dc1177b28a94c5014c35a36b', - 'duration': 302, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://www.vesti.ru/doc.html?id=1349233', - 'info_dict': { - 'id': '773865', - 'ext': 'mp4', - 'title': 'Участники митинга штурмуют Донецкую областную администрацию', - 'description': 'md5:1a160e98b3195379b4c849f2f4958009', - 'duration': 210, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://www.vesti.ru/only_video.html?vid=576180', - 'info_dict': { - 'id': '766048', - 'ext': 'mp4', - 'title': 'США заморозило, Британию затопило', - 'description': 'md5:f0ed0695ec05aed27c56a70a58dc4cc1', - 'duration': 87, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://hitech.vesti.ru/news/view/id/4000', - 'info_dict': { - 'id': '766888', - 'ext': 'mp4', - 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', - 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', - 'duration': 279, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, - { - 'url': 'http://sochi2014.vesti.ru/video/index/video_id/766403', - 'info_dict': { - 'id': '766403', - 'ext': 'mp4', - 'title': 'XXII зимние Олимпийские игры. Российские хоккеисты стартовали на Олимпиаде с победы', - 'description': 'md5:55805dfd35763a890ff50fa9e35e31b3', - 'duration': 271, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - 'skip': 'Blocked outside Russia', - }, - { - 'url': 'http://sochi2014.vesti.ru/live/play/live_id/301', - 'info_dict': { - 'id': '51499', - 'ext': 'flv', - 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', - 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', - }, - 'params': { - # rtmp download - 'skip_download': True, - }, - 'skip': 'Translation has finished', - }, - ] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - - page = self._download_webpage(url, video_id, 'Downloading page') - - mobj = re.search( - r']+?property="og:video"[^>]+?content="http://www\.vesti\.ru/i/flvplayer_videoHost\.swf\?vid=(?P\d+)', - page) - if mobj: - video_id = mobj.group('id') - page = self._download_webpage(f'http://www.vesti.ru/only_video.html?vid={video_id}', video_id, - 'Downloading video page') - - rutv_url = RUTVIE._extract_url(page) - if rutv_url: - return self.url_result(rutv_url, 'RUTV') - - raise ExtractorError('No video found', expected=True)