Update _real_extract in BandcampWeeklyIE class to handel the keyerror

1 week ago · 05285b7e0d
parent e8d49b1c7f
commit 05285b7e0d
1 changed files with 67 additions and 23 deletions
--- a/yt_dlp/extractor/bandcamp.py
+++ b/yt_dlp/extractor/bandcamp.py
@ -440,39 +440,83 @@ class BandcampWeeklyIE(BandcampIE):  # XXX: Do not subclass from concrete IE
        blob = self._extract_data_attr(webpage, show_id, 'blob')
-        show = blob['bcw_data'][show_id]
+        # Updated to correctly navigate the new data structure
        # The data is now in a list under appData['shows']
        shows_list = try_get(blob, lambda x: x['appData']['shows'], list)
        show = None
        if shows_list:
            for s in shows_list:
                if str(s.get('showId')) == show_id:
                    show = s
                    break
        if not show:
            # Fallback to the original logic if the new path fails
            show = try_get(blob, lambda x: x['bcw_data'][show_id], dict)
        if not show:
            raise ExtractorError('Bandcamp Weekly data not found. This extractor is outdated. Please report this issue.')
        formats = []
-        for format_id, format_url in show['audio_stream'].items():
+        # The audio track ID is now in the 'audioTrackId' key
-            if not url_or_none(format_url):
+        audio_track_id = str_or_none(show.get('audioTrackId'))
-                continue
+
-            for known_ext in KNOWN_EXTENSIONS:
+        # If audio track ID is found, download the audio page to get formats
-                if known_ext in format_id:
+        if audio_track_id:
-                    ext = known_ext
+            track_url = f'https://bandcamp.com/download?id={audio_track_id}'
-                    break
+            audio_page = self._download_webpage(
-            else:
+                track_url, show_id, 'Downloading audio download page')
-                ext = None
+            
-            formats.append({
+            # The download links are on the new page, so we need a new way to parse
-                'format_id': format_id,
+            audio_blob = self._extract_data_attr(audio_page, show_id, 'blob', fatal=False)
-                'url': format_url,
+            if audio_blob:
-                'ext': ext,
+                # The formats are now in the 'downloads' list within the audio_blob
-                'vcodec': 'none',
+                downloads = try_get(audio_blob, lambda x: x['digital_items'][0]['downloads'], dict)
-            })
+                if downloads:
-
+                    for format_id, f in downloads.items():
-        title = show.get('audio_title') or 'Bandcamp Weekly'
+                        formats.append({
-        subtitle = show.get('subtitle')
+                            'url': f.get('url'),
                            'format_id': format_id,
                            'ext': f.get('encoding_name'),
                            'vcodec': 'none',
                        })
        # Fallback to the old logic if new parsing fails
        if not formats and show.get('audio_stream'):
            for format_id, format_url in show['audio_stream'].items():
                if not url_or_none(format_url):
                    continue
                for known_ext in KNOWN_EXTENSIONS:
                    if known_ext in format_id:
                        ext = known_ext
                        break
                else:
                    ext = None
                formats.append({
                    'format_id': format_id,
                    'url': format_url,
                    'ext': ext,
                    'vcodec': 'none',
                })
        # If no formats were found after all attempts, raise an error
        if not formats:
            raise ExtractorError('Could not find any audio formats for this episode.')
        title = show.get('audio_title') or show.get('title') or 'Bandcamp Weekly'
        subtitle = show.get('shortDesc')
        if subtitle:
            title += f' - {subtitle}'
-
+            
        return {
            'id': show_id,
            'title': title,
-            'description': show.get('desc') or show.get('short_desc'),
+            'description': show.get('desc') or show.get('shortDesc'),
            'duration': float_or_none(show.get('audio_duration')),
            'is_live': False,
-            'release_date': unified_strdate(show.get('published_date')),
+            'release_date': unified_strdate(show.get('date')),
            'series': 'Bandcamp Weekly',
-            'episode': show.get('subtitle'),
+            'episode': show.get('shortDesc'),
            'episode_id': show_id,
            'formats': formats,
        }