From aefe3abd73683275a20275e02327125be40a256f Mon Sep 17 00:00:00 2001 From: 4ft35t <4ft35t@gmail.com> Date: Sat, 11 Jan 2025 01:50:18 +0800 Subject: [PATCH 1/4] [ie/ximalaya:album] fix #11327 --- yt_dlp/extractor/ximalaya.py | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py index 02bf6a7beb..1d55b7ae86 100644 --- a/yt_dlp/extractor/ximalaya.py +++ b/yt_dlp/extractor/ximalaya.py @@ -222,25 +222,37 @@ class XimalayaAlbumIE(XimalayaBaseIE): def _real_extract(self, url): playlist_id = self._match_id(url) - first_page = self._fetch_page(playlist_id, 1) - page_count = math.ceil(first_page['trackTotalCount'] / first_page['pageSize']) + title = self._download_json( + 'https://www.ximalaya.com/revision/album/v1/simple', + playlist_id, note='Downloading album info', query={'albumId': playlist_id})['data']['albumPageMainInfo']['albumTitle'] - entries = InAdvancePagedList( - lambda idx: self._get_entries(self._fetch_page(playlist_id, idx + 1) if idx else first_page), - page_count, first_page['pageSize']) - title = traverse_obj(first_page, ('tracks', 0, 'albumTitle'), expected_type=str) + page_size = 30 + page_idx = 1 + page_cache = {} + while True: + page_data = self._fetch_page(playlist_id, page_idx, page_size) + page_cache[str(page_idx)] = page_data + if len(page_data['tracksAudioPlay']) < page_size: + break + page_idx += 1 + + page_count = page_idx + + entries = InAdvancePagedList( + lambda idx: self._get_entries(page_cache[str(idx+1)]), + page_count, page_size) return self.playlist_result(entries, playlist_id, title) - def _fetch_page(self, playlist_id, page_idx): + def _fetch_page(self, playlist_id, page_idx, page_size=30): return self._download_json( - 'https://www.ximalaya.com/revision/album/v1/getTracksList', + 'https://www.ximalaya.com/revision/play/v1/show', playlist_id, note=f'Downloading tracks list page {page_idx}', - query={'albumId': playlist_id, 'pageNum': page_idx})['data'] + query={'id': playlist_id, 'num': page_idx, 'size': page_size, 'ptype': 0})['data'] def _get_entries(self, page_data): - for e in page_data['tracks']: + for e in page_data['tracksAudioPlay']: yield self.url_result( - self._proto_relative_url(f'//www.ximalaya.com{e["url"]}'), + self._proto_relative_url(f'//www.ximalaya.com{e["trackUrl"]}'), XimalayaIE, e.get('trackId'), e.get('title')) From 69ab3e8ca5b4860ce830b6b1e20ee354c7bb39aa Mon Sep 17 00:00:00 2001 From: 4ft35t <4ft35t@gmail.com> Date: Sat, 11 Jan 2025 02:15:24 +0800 Subject: [PATCH 2/4] update coding style --- yt_dlp/extractor/ximalaya.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py index 1d55b7ae86..34c4b2a13b 100644 --- a/yt_dlp/extractor/ximalaya.py +++ b/yt_dlp/extractor/ximalaya.py @@ -222,10 +222,9 @@ class XimalayaAlbumIE(XimalayaBaseIE): def _real_extract(self, url): playlist_id = self._match_id(url) - title = self._download_json( - 'https://www.ximalaya.com/revision/album/v1/simple', - playlist_id, note='Downloading album info', query={'albumId': playlist_id})['data']['albumPageMainInfo']['albumTitle'] - + meta = self._download_json('https://www.ximalaya.com/revision/album/v1/simple', + playlist_id, note='Downloading album info', query={'albumId': playlist_id}) + title = traverse_obj(meta, ('data', 'albumPageMainInfo', 'albumTitle')) page_size = 30 page_idx = 1 @@ -233,26 +232,27 @@ class XimalayaAlbumIE(XimalayaBaseIE): while True: page_data = self._fetch_page(playlist_id, page_idx, page_size) page_cache[str(page_idx)] = page_data - if len(page_data['tracksAudioPlay']) < page_size: + if len(page_data) < page_size: break page_idx += 1 page_count = page_idx entries = InAdvancePagedList( - lambda idx: self._get_entries(page_cache[str(idx+1)]), + lambda idx: self._get_entries(page_cache.get(str(idx+1))), page_count, page_size) return self.playlist_result(entries, playlist_id, title) def _fetch_page(self, playlist_id, page_idx, page_size=30): - return self._download_json( + meta = self._download_json( 'https://www.ximalaya.com/revision/play/v1/show', playlist_id, note=f'Downloading tracks list page {page_idx}', - query={'id': playlist_id, 'num': page_idx, 'size': page_size, 'ptype': 0})['data'] + query={'id': playlist_id, 'num': page_idx, 'size': page_size, 'ptype': 0}) + return traverse_obj(meta, ('data', 'tracksAudioPlay')) def _get_entries(self, page_data): - for e in page_data['tracksAudioPlay']: + for e in page_data: yield self.url_result( - self._proto_relative_url(f'//www.ximalaya.com{e["trackUrl"]}'), + self._proto_relative_url(f'//www.ximalaya.com{e.get("trackUrl")}'), XimalayaIE, e.get('trackId'), e.get('title')) From f186a2a750bd23dc32cb95790685b26390780e25 Mon Sep 17 00:00:00 2001 From: 4ft35t <4ft35t@gmail.com> Date: Mon, 13 Jan 2025 09:48:24 +0800 Subject: [PATCH 3/4] clean up --- yt_dlp/extractor/ximalaya.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py index 34c4b2a13b..22c53d43b3 100644 --- a/yt_dlp/extractor/ximalaya.py +++ b/yt_dlp/extractor/ximalaya.py @@ -1,5 +1,4 @@ import base64 -import math import time from .common import InfoExtractor From 66530018e8caecedd67f1a7e7430df030a8c9894 Mon Sep 17 00:00:00 2001 From: 4ft35t <4ft35t@gmail.com> Date: Tue, 14 Jan 2025 16:46:17 +0800 Subject: [PATCH 4/4] autopep8 format --- yt_dlp/extractor/ximalaya.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py index 22c53d43b3..fb7139216f 100644 --- a/yt_dlp/extractor/ximalaya.py +++ b/yt_dlp/extractor/ximalaya.py @@ -222,7 +222,7 @@ class XimalayaAlbumIE(XimalayaBaseIE): playlist_id = self._match_id(url) meta = self._download_json('https://www.ximalaya.com/revision/album/v1/simple', - playlist_id, note='Downloading album info', query={'albumId': playlist_id}) + playlist_id, note='Downloading album info', query={'albumId': playlist_id}) title = traverse_obj(meta, ('data', 'albumPageMainInfo', 'albumTitle')) page_size = 30 @@ -238,7 +238,7 @@ class XimalayaAlbumIE(XimalayaBaseIE): page_count = page_idx entries = InAdvancePagedList( - lambda idx: self._get_entries(page_cache.get(str(idx+1))), + lambda idx: self._get_entries(page_cache.get(str(idx + 1))), page_count, page_size) return self.playlist_result(entries, playlist_id, title)