diff --git a/test/tests.json b/test/tests.json index c2de6099d2..61914e8686 100644 --- a/test/tests.json +++ b/test/tests.json @@ -120,5 +120,14 @@ "upload_date": "20091225", "description": "Installing Gentoo Linux on Powerbook G4, it turns out the sleep indicator becomes HDD activity indicator :D" } + }, + { + "name": "Steam", + "url": "http://store.steampowered.com/video/105600/", + "file": "81300.flv", + "md5": "f870007cee7065d7c76b88f0a45ecc07", + "info_dict": { + "title": "Terraria 1.1 Trailer" + } } ] diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index d74751a55b..d7295ae3fe 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -3756,3 +3756,52 @@ class TweetReelIE(InfoExtractor): 'upload_date': upload_date } return [info] + +class SteamIE(InfoExtractor): + _VALID_URL = r"""http://store.steampowered.com/ + (?Pvideo|app)/ #If the page is only for videos or for a game + (?P\d+)/? + (?P\d*)(?P\??) #For urltype == video we sometimes get the videoID + """ + IE_NAME = u'Steam' + + def suitable(self, url): + """Receives a URL and returns True if suitable for this IE.""" + return re.match(self._VALID_URL, url, re.VERBOSE) is not None + + def report_download_video_page(self, game_id): + self._downloader.to_screen(u'[%s] %s: Downloading video page' % (self.IE_NAME, game_id)) + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url, re.VERBOSE) + urlRE = r"'movie_(?P\d+)': \{\s*FILENAME: \"(?P[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P[\w:/\.\?=\+-]+)\")?\s*\}," + gameID = m.group('gameID') + videourl = 'http://store.steampowered.com/video/%s/' % gameID + try: + self.report_download_video_page(gameID) + urlh = compat_urllib_request.urlopen(videourl) + webpage_bytes = urlh.read() + webpage = webpage_bytes.decode('utf-8', 'ignore') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) + return + mweb = re.finditer(urlRE, webpage) + namesRE = r'(?P[\w:/\.\?=\+\s-]+)' + titles = list(re.finditer(namesRE, webpage)) + videos = [] + i = 0 + for vid in mweb: + video_id = vid.group('videoID') + title = titles[i].group('videoName') + video_url=vid.group('videoURL') + if not video_url: + self._downloader.trouble(u'ERROR: Cannot find video url for %s' % video_id) + i += 1 + info = { + 'id':video_id, + 'url':video_url, + 'ext': 'flv', + 'title': title + } + videos.append(info) + return videos diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 8068810ca1..62ecdf6b6c 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -313,6 +313,7 @@ def gen_extractors(): JustinTVIE(), FunnyOrDieIE(), TweetReelIE(), + SteamIE(), GenericIE() ]