diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py
index 0c969d774f..35c3d275f8 100644
--- a/yt_dlp/extractor/steam.py
+++ b/yt_dlp/extractor/steam.py
@@ -1,11 +1,19 @@
+import json
import re
from .common import InfoExtractor
from ..utils import (
ExtractorError,
+ clean_html,
extract_attributes,
- get_element_by_class,
str_or_none,
+ url_or_none,
+)
+from ..utils.traversal import (
+ find_element,
+ find_elements,
+ traverse_obj,
+ trim_str,
)
@@ -23,40 +31,18 @@ class SteamIE(InfoExtractor):
_AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
_TESTS = [{
'url': 'http://store.steampowered.com/video/105600/',
- 'playlist': [
- {
- 'md5': 'e800bd0baf47286d8b434d07b357247e',
- 'info_dict': {
- 'id': '256785003',
- 'ext': 'mp4',
- 'title': 'Terraria video 256785003',
- 'thumbnail': r're:^https?://[^/]*steamstatic\.com',
- },
- },
- {
- 'md5': '9612ee058ebd645371c5ddd69adb310f',
- 'info_dict': {
- 'id': '2040428',
- 'ext': 'mp4',
- 'title': 'Terraria video 2040428',
- 'thumbnail': r're:^https?://[^/]*steamstatic\.com',
- },
- },
- ],
'info_dict': {
'id': '105600',
'title': 'Terraria',
},
- 'params': {
- 'playlistend': 2,
- },
+ 'playlist_mincount': 3,
}, {
'url': 'https://store.steampowered.com/app/271590/Grand_Theft_Auto_V/',
'info_dict': {
'id': '271590',
- 'title': 'Grand Theft Auto V',
+ 'title': 'Grand Theft Auto V Legacy',
},
- 'playlist_count': 23,
+ 'playlist_mincount': 26,
}]
def _real_extract(self, url):
@@ -81,36 +67,31 @@ class SteamIE(InfoExtractor):
self.report_age_confirmation()
webpage = self._download_webpage(video_url, playlist_id)
- videos = re.findall(r'(
]+id=[\'"]highlight_movie_(\d+)[\'"][^>]+>)', webpage)
+ app_name = traverse_obj(webpage, ({find_element(cls='apphub_AppName')}, {clean_html}))
entries = []
- playlist_title = get_element_by_class('apphub_AppName', webpage)
- for movie, movie_id in videos:
- if not movie:
- continue
- movie = extract_attributes(movie)
- if not movie_id:
- continue
- entry = {
- 'id': movie_id,
- 'title': f'{playlist_title} video {movie_id}',
- }
+ for data_prop in traverse_obj(webpage, (
+ {find_elements(cls='highlight_player_item highlight_movie', html=True)},
+ ..., {extract_attributes}, 'data-props', {json.loads}, {dict},
+ )):
formats = []
- if movie:
- data = self._parse_json(movie['data-props'], video_id=movie_id)
-
- entry['thumbnail'] = data.get('screenshot')
-
- for dash_url in data.get('dashManifests', []):
- formats.extend(self._extract_mpd_formats(
- dash_url, movie_id, mpd_id='dash', fatal=False))
-
- hls_url = data.get('hlsManifest')
- if hls_url:
- formats.extend(self._extract_m3u8_formats(
- hls_url, movie_id, entry_protocol='m3u8', m3u8_id='hls', fatal=False))
+ if hls_manifest := traverse_obj(data_prop, ('hlsManifest', {url_or_none})):
+ formats.extend(self._extract_m3u8_formats(
+ hls_manifest, playlist_id, 'mp4', m3u8_id='hls', fatal=False))
+
+ for dash_manifest in traverse_obj(data_prop, (
+ 'dashManifests', ..., {url_or_none}, filter, all, filter,
+ )):
+ formats.extend(self._extract_mpd_formats(
+ dash_manifest, playlist_id, mpd_id='dash', fatal=False))
+
+ movie_id = traverse_obj(data_prop, ('id', {trim_str(start='highlight_movie_')}))
+ entries.append({
+ 'id': movie_id,
+ 'title': f'{app_name} video {movie_id}',
+ 'formats': formats,
+ 'thumbnail': traverse_obj(data_prop, ('screenshot', {url_or_none})),
+ })
- entry['formats'] = formats
- entries.append(entry)
embedded_videos = re.findall(r'(