From aa060addbd3b9ac4bbffe13b40f79560b673209c Mon Sep 17 00:00:00 2001 From: e2dk4r <43293320+e2dk4r@users.noreply.github.com> Date: Mon, 30 Dec 2024 22:56:00 +0300 Subject: [PATCH] [ie/puhutv] Fix extracting playlist This commit fixes extracting playlist or episodes from *-detay (e.g. https://puhutv.com/deha-detay). This data extracted from android app. --- yt_dlp/extractor/puhutv.py | 54 ++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py index b62050ecd5..8738a70050 100644 --- a/yt_dlp/extractor/puhutv.py +++ b/yt_dlp/extractor/puhutv.py @@ -6,6 +6,7 @@ from ..utils import ( int_or_none, parse_resolution, str_or_none, + traverse_obj, try_get, unified_timestamp, url_or_none, @@ -18,20 +19,21 @@ class PuhuTVIE(InfoExtractor): IE_NAME = 'puhutv' _TESTS = [{ # film - 'url': 'https://puhutv.com/sut-kardesler-izle', - 'md5': 'a347470371d56e1585d1b2c8dab01c96', + 'url': 'https://puhutv.com/bi-kucuk-eylul-meselesi-izle', + 'md5': '4de98170ccb84c05779b1f046b3c86f8', 'info_dict': { - 'id': '5085', - 'display_id': 'sut-kardesler', + 'id': '11909', + 'display_id': 'bi-kucuk-eylul-meselesi', 'ext': 'mp4', - 'title': 'Süt Kardeşler', - 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa', + 'title': 'Bi Küçük Eylül Meselesi', + 'description': 'md5:c2ab964c6542b7b26acacca0773adce2', 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 4832.44, - 'creator': 'Arzu Film', - 'timestamp': 1561062602, + 'duration': 6176.96, + 'creator': 'Ay Yapım', + 'creators': ['Ay Yapım'], + 'timestamp': 1561062749, 'upload_date': '20190620', - 'release_year': 1976, + 'release_year': 2014, 'view_count': int, 'tags': list, }, @@ -181,7 +183,7 @@ class PuhuTVSerieIE(InfoExtractor): 'playlist_mincount': 205, }, { # a film detail page which is using same url with serie page - 'url': 'https://puhutv.com/kaybedenler-kulubu-detay', + 'url': 'https://puhutv.com/bizim-icin-sampiyon-detay', 'only_matching': True, }] @@ -194,24 +196,19 @@ class PuhuTVSerieIE(InfoExtractor): has_more = True while has_more is True: season = self._download_json( - f'https://galadriel.puhutv.com/seasons/{season_id}', + f'https://appservice.puhutv.com/api/seasons/{season_id}/episodes?v=2', season_id, f'Downloading page {page}', query={ 'page': page, - 'per': 40, - }) - episodes = season.get('episodes') - if isinstance(episodes, list): - for ep in episodes: - slug_path = str_or_none(ep.get('slugPath')) - if not slug_path: - continue - video_id = str_or_none(int_or_none(ep.get('id'))) - yield self.url_result( - f'https://puhutv.com/{slug_path}', - ie=PuhuTVIE.ie_key(), video_id=video_id, - video_title=ep.get('name') or ep.get('eventLabel')) + 'per': 100, + })['data'] + + for episode in traverse_obj(season, ('episodes', lambda _, v: v.get('slug') or v['assets'][0]['slug'])): + slug = episode.get('slug') or episode['assets'][0]['slug'] + yield self.url_result( + f'https://puhutv.com/{slug}', PuhuTVIE, episode.get('id'), episode.get('name')) + page += 1 - has_more = season.get('hasMore') + has_more = traverse_obj(season, 'has_more') def _real_extract(self, url): playlist_id = self._match_id(url) @@ -226,7 +223,6 @@ class PuhuTVSerieIE(InfoExtractor): self._extract_entries(seasons), playlist_id, info.get('name')) # For films, these are using same url with series - video_id = info.get('slug') or info['assets'][0]['slug'] + video_id = (info['slug'] or info['assets'][0]['slug']).removesuffix('-detay').removesuffix('-izle') return self.url_result( - f'https://puhutv.com/{video_id}-izle', - PuhuTVIE.ie_key(), video_id) + f'https://puhutv.com/{video_id}-izle', PuhuTVIE, video_id)