diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 379e64548c..f7e3f25c3b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1065,6 +1065,7 @@ from .loom import ( from .lovehomeporn import LoveHomePornIE from .lrt import ( LRTVODIE, + LRTRadioIE, LRTStreamIE, ) from .lsm import ( diff --git a/yt_dlp/extractor/lrt.py b/yt_dlp/extractor/lrt.py index 1a0b6da230..e50194f88b 100644 --- a/yt_dlp/extractor/lrt.py +++ b/yt_dlp/extractor/lrt.py @@ -2,8 +2,11 @@ from .common import InfoExtractor from ..utils import ( clean_html, merge_dicts, + str_or_none, traverse_obj, + unified_timestamp, url_or_none, + urljoin, ) @@ -80,7 +83,7 @@ class LRTVODIE(LRTBaseIE): }] def _real_extract(self, url): - path, video_id = self._match_valid_url(url).groups() + path, video_id = self._match_valid_url(url).group('path', 'id') webpage = self._download_webpage(url, video_id) media_url = self._extract_js_var(webpage, 'main_url', path) @@ -106,3 +109,42 @@ class LRTVODIE(LRTBaseIE): } return merge_dicts(clean_info, jw_data, json_ld_data) + + +class LRTRadioIE(LRTBaseIE): + _VALID_URL = r'https?://(?:www\.)?lrt\.lt/radioteka/irasas/(?P\d+)/(?P[^?#/]+)' + _TESTS = [{ + # m3u8 download + 'url': 'https://www.lrt.lt/radioteka/irasas/2000359728/nemarios-eiles-apie-pragarus-ir-skaistyklas-su-aiste-kiltinaviciute', + 'info_dict': { + 'id': '2000359728', + 'ext': 'm4a', + 'title': 'Nemarios eilės: apie pragarus ir skaistyklas su Aiste Kiltinavičiūte', + 'description': 'md5:5eee9a0e86a55bf547bd67596204625d', + 'timestamp': 1726143120, + 'upload_date': '20240912', + 'tags': 'count:5', + 'thumbnail': r're:https?://.+/.+\.jpe?g', + 'categories': ['Daiktiniai įrodymai'], + }, + }, { + 'url': 'https://www.lrt.lt/radioteka/irasas/2000304654/vakaras-su-knyga-svetlana-aleksijevic-cernobylio-malda-v-dalis?season=%2Fmediateka%2Faudio%2Fvakaras-su-knyga%2F2023', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id, path = self._match_valid_url(url).group('id', 'path') + media = self._download_json( + 'https://www.lrt.lt/radioteka/api/media', video_id, + query={'url': f'/mediateka/irasas/{video_id}/{path}'}) + + return traverse_obj(media, { + 'id': ('id', {int}, {str_or_none}), + 'title': ('title', {str}), + 'tags': ('tags', ..., 'name', {str}), + 'categories': ('playlist_item', 'category', {str}, filter, all, filter), + 'description': ('content', {clean_html}, {str}), + 'timestamp': ('date', {lambda x: x.replace('.', '/')}, {unified_timestamp}), + 'thumbnail': ('playlist_item', 'image', {urljoin('https://www.lrt.lt')}), + 'formats': ('playlist_item', 'file', {lambda x: self._extract_m3u8_formats(x, video_id)}), + })