From aefe3abd73683275a20275e02327125be40a256f Mon Sep 17 00:00:00 2001 From: 4ft35t <4ft35t@gmail.com> Date: Sat, 11 Jan 2025 01:50:18 +0800 Subject: [PATCH] [ie/ximalaya:album] fix #11327 --- yt_dlp/extractor/ximalaya.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py index 02bf6a7beb..1d55b7ae86 100644 --- a/yt_dlp/extractor/ximalaya.py +++ b/yt_dlp/extractor/ximalaya.py @@ -222,25 +222,37 @@ class XimalayaAlbumIE(XimalayaBaseIE): def _real_extract(self, url): playlist_id = self._match_id(url) - first_page = self._fetch_page(playlist_id, 1) - page_count = math.ceil(first_page['trackTotalCount'] / first_page['pageSize']) + title = self._download_json( + 'https://www.ximalaya.com/revision/album/v1/simple', + playlist_id, note='Downloading album info', query={'albumId': playlist_id})['data']['albumPageMainInfo']['albumTitle'] - entries = InAdvancePagedList( - lambda idx: self._get_entries(self._fetch_page(playlist_id, idx + 1) if idx else first_page), - page_count, first_page['pageSize']) - title = traverse_obj(first_page, ('tracks', 0, 'albumTitle'), expected_type=str) + page_size = 30 + page_idx = 1 + page_cache = {} + while True: + page_data = self._fetch_page(playlist_id, page_idx, page_size) + page_cache[str(page_idx)] = page_data + if len(page_data['tracksAudioPlay']) < page_size: + break + page_idx += 1 + + page_count = page_idx + + entries = InAdvancePagedList( + lambda idx: self._get_entries(page_cache[str(idx+1)]), + page_count, page_size) return self.playlist_result(entries, playlist_id, title) - def _fetch_page(self, playlist_id, page_idx): + def _fetch_page(self, playlist_id, page_idx, page_size=30): return self._download_json( - 'https://www.ximalaya.com/revision/album/v1/getTracksList', + 'https://www.ximalaya.com/revision/play/v1/show', playlist_id, note=f'Downloading tracks list page {page_idx}', - query={'albumId': playlist_id, 'pageNum': page_idx})['data'] + query={'id': playlist_id, 'num': page_idx, 'size': page_size, 'ptype': 0})['data'] def _get_entries(self, page_data): - for e in page_data['tracks']: + for e in page_data['tracksAudioPlay']: yield self.url_result( - self._proto_relative_url(f'//www.ximalaya.com{e["url"]}'), + self._proto_relative_url(f'//www.ximalaya.com{e["trackUrl"]}'), XimalayaIE, e.get('trackId'), e.get('title'))