From 78d3442b1209d3858cfea1f7ca958f661784b5ab Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Sun, 9 Jun 2013 14:21:42 +0200 Subject: [PATCH] test: extend the reach of info_dict checking * print the info_dict in a format suitable to easy adding to tests.json during tests if un-tested fields are detected * make it possible to put the crc32 in tests.json if the field is too long * complete the "info_dict" fields in existing tests * fixed the bugs catched doing this --- test/test_download.py | 21 +++- test/tests.json | 185 ++++++++++++++++++++++++++++------- youtube_dl/InfoExtractors.py | 17 ++-- 3 files changed, 177 insertions(+), 46 deletions(-) diff --git a/test/test_download.py b/test/test_download.py index 565b1ebc5..862152033 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -7,8 +7,8 @@ import os import json import unittest import sys -import hashlib import socket +import binascii # Allow direct execution sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -38,6 +38,9 @@ def _try_rm(filename): if ose.errno != errno.ENOENT: raise +def crc32(value): + return '%08x' % (binascii.crc32(value.encode('utf8')) & 0xffffffff) + class FileDownloader(youtube_dl.FileDownloader): def __init__(self, *args, **kwargs): self.to_stderr = self.to_screen @@ -124,7 +127,21 @@ def generator(test_case): with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof: info_dict = json.load(infof) for (info_field, value) in tc.get('info_dict', {}).items(): - self.assertEqual(value, info_dict.get(info_field)) + if isinstance(value, compat_str) and value.startswith('crc32:'): + self.assertEqual(value, 'crc32:' + crc32(info_dict.get(info_field))) + else: + self.assertEqual(value, info_dict.get(info_field)) + + # If checkable fields are missing from the test case, print the info_dict + test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'crc32:' + crc32(value)) + for key, value in info_dict.items() + if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location')) + if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()): + sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=2) + u'\n') + + # Check for the presence of mandatory fields + for key in ('id', 'url', 'title', 'ext'): + self.assertTrue(key in info_dict.keys() and info_dict[key]) finally: for tc in test_cases: _try_rm(tc['file']) diff --git a/test/tests.json b/test/tests.json index 82da27d5b..e9abb0950 100644 --- a/test/tests.json +++ b/test/tests.json @@ -15,43 +15,76 @@ "name": "Dailymotion", "md5": "392c4b85a60a90dc4792da41ce3144eb", "url": "http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech", - "file": "x33vw9.mp4" + "file": "x33vw9.mp4", + "info_dict": { + "uploader": "Alex and Van .", + "title": "Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\"" + } }, { "name": "Metacafe", "add_ie": ["Youtube"], "url": "http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", - "file": "_aUehQsCQtM.flv" + "file": "_aUehQsCQtM.flv", + "info_dict": { + "upload_date": "20090102", + "title": "The Electric Company | \"Short I\" | PBS KIDS GO!", + "description": "crc32:5ef3bc57", + "uploader": "PBS", + "uploader_id": "PBS" + } }, { "name": "BlipTV", "md5": "b2d849efcf7ee18917e4b4d9ff37cafe", "url": "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352", - "file": "5779306.m4v" + "file": "5779306.m4v", + "info_dict": { + "upload_date": "20111205", + "description": "crc32:fa658d49", + "uploader": "Comic Book Resources - CBR TV", + "title": "CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3" + } }, { "name": "XVideos", "md5": "1d0c835822f0a71a7bf011855db929d0", "url": "http://www.xvideos.com/video939581/funny_porns_by_s_-1", - "file": "939581.flv" + "file": "939581.flv", + "info_dict": { + "title": "Funny Porns By >>>>S<<<<<< -1" + } }, { "name": "YouPorn", "md5": "c37ddbaaa39058c76a7e86c6813423c1", "url": "http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/", - "file": "505835.mp4" + "file": "505835.mp4", + "info_dict": { + "upload_date": "20101221", + "description": "Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?", + "uploader": "Ask Dan And Jennifer", + "title": "Sex Ed: Is It Safe To Masturbate Daily?" + } }, { "name": "Pornotube", "md5": "374dd6dcedd24234453b295209aa69b6", "url": "http://pornotube.com/c/173/m/1689755/Marilyn-Monroe-Bathing", - "file": "1689755.flv" + "file": "1689755.flv", + "info_dict": { + "upload_date": "20090708", + "title": "Marilyn-Monroe-Bathing" + } }, { "name": "YouJizz", "md5": "07e15fa469ba384c7693fd246905547c", "url": "http://www.youjizz.com/videos/zeichentrick-1-2189178.html", - "file": "2189178.flv" + "file": "2189178.flv", + "info_dict": { + "title": "Zeichentrick 1" + } }, { "name": "Vimeo", @@ -70,61 +103,103 @@ "name": "Soundcloud", "md5": "ebef0a451b909710ed1d7787dddbf0d7", "url": "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy", - "file": "62986583.mp3" + "file": "62986583.mp3", + "info_dict": { + "upload_date": "20121011", + "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", + "uploader": "E.T. ExTerrestrial Music", + "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" + } }, { "name": "StanfordOpenClassroom", "md5": "544a9468546059d4e80d76265b0443b8", "url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100", - "file": "PracticalUnix_intro-environment.mp4" + "file": "PracticalUnix_intro-environment.mp4", + "info_dict": { + "title": "Intro Environment" + } }, { "name": "XNXX", "md5": "0831677e2b4761795f68d417e0b7b445", "url": "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_", - "file": "1135332.flv" + "file": "1135332.flv", + "info_dict": { + "title": "lida » Naked Funny Actress (5)" + } }, { "name": "Youku", "url": "http://v.youku.com/v_show/id_XNDgyMDQ2NTQw.html", "file": "XNDgyMDQ2NTQw_part00.flv", "md5": "ffe3f2e435663dc2d1eea34faeff5b5b", - "params": { "test": false } + "params": { "test": false }, + "info_dict": { + "title": "youtube-dl test video \"'/\\ä↭𝕐" + } }, { "name": "NBA", "url": "http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html", "file": "0021200253-okc-bkn-recap.nba.mp4", - "md5": "c0edcfc37607344e2ff8f13c378c88a4" + "md5": "c0edcfc37607344e2ff8f13c378c88a4", + "info_dict": { + "description": "Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.", + "title": "Thunder vs. Nets" + } }, { "name": "JustinTV", "url": "http://www.twitch.tv/thegamedevhub/b/296128360", "file": "296128360.flv", - "md5": "ecaa8a790c22a40770901460af191c9a" + "md5": "ecaa8a790c22a40770901460af191c9a", + "info_dict": { + "upload_date": "20110927", + "uploader_id": 25114803, + "uploader": "thegamedevhub", + "title": "Beginner Series - Scripting With Python Pt.1" + } }, { "name": "MyVideo", "url": "http://www.myvideo.de/watch/8229274/bowling_fail_or_win", "file": "8229274.flv", - "md5": "2d2753e8130479ba2cb7e0a37002053e" + "md5": "2d2753e8130479ba2cb7e0a37002053e", + "info_dict": { + "title": "bowling-fail-or-win" + } }, { "name": "Escapist", "url": "http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate", "file": "6618-Breaking-Down-Baldurs-Gate.mp4", - "md5": "c6793dbda81388f4264c1ba18684a74d" + "md5": "c6793dbda81388f4264c1ba18684a74d", + "info_dict": { + "description": "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", + "uploader": "the-escapist-presents", + "title": "Breaking Down Baldur's Gate" + } }, { "name": "GooglePlus", "url": "https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH", - "file": "ZButuJc6CtH.flv" + "file": "ZButuJc6CtH.flv", + "info_dict": { + "upload_date": "20120613", + "uploader": "井上ヨシマサ", + "title": "嘆きの天使 降臨" + } }, { "name": "FunnyOrDie", "url": "http://www.funnyordie.com/videos/0732f586d7/heart-shaped-box-literal-video-version", "file": "0732f586d7.mp4", - "md5": "f647e9e90064b53b6e046e75d0241fbd" + "md5": "f647e9e90064b53b6e046e75d0241fbd", + "info_dict": { + "description": "Lyrics changed to match the video. Spoken cameo by Obscurus Lupa (from ThatGuyWithTheGlasses.com). Based on a concept by Dustin McLean (DustFilms.com). Performed, edited, and written by David A. Scott.", + "title": "Heart-Shaped Box: Literal Video Version" + } }, { "name": "Steam", @@ -161,6 +236,7 @@ "url": "http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things", "file": "12-jan-pythonthings.mp4", "info_dict": { + "description": "Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.", "title": "A Few of My Favorite [Python] Things" }, "params": { @@ -173,7 +249,10 @@ "file": "422212.mp4", "md5": "4e2f5cb088a83cd8cdb7756132f9739d", "info_dict": { - "title": "thedailyshow-kristen-stewart part 1" + "upload_date": "20121214", + "description": "Kristen Stewart", + "uploader": "thedailyshow", + "title": "thedailyshow-kristen-stewart part 1" } }, { @@ -224,42 +303,48 @@ "file": "11885679.m4a", "md5": "d30b5b5f74217410f4689605c35d1fd7", "info_dict": { - "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad" + "title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad", + "uploader_id": "ytdl" } }, { "file": "11885680.m4a", "md5": "4eb0a669317cd725f6bbd336a29f923a", "info_dict": { - "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad" + "title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad", + "uploader_id": "ytdl" } }, { "file": "11885682.m4a", "md5": "1893e872e263a2705558d1d319ad19e8", "info_dict": { - "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad" + "title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad", + "uploader_id": "ytdl" } }, { "file": "11885683.m4a", "md5": "b673c46f47a216ab1741ae8836af5899", "info_dict": { - "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad" + "title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad", + "uploader_id": "ytdl" } }, { "file": "11885684.m4a", "md5": "1d74534e95df54986da7f5abf7d842b7", "info_dict": { - "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad" + "title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad", + "uploader_id": "ytdl" } }, { "file": "11885685.m4a", "md5": "f081f47af8f6ae782ed131d38b9cd1c0", "info_dict": { - "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad" + "title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad", + "uploader_id": "ytdl" } } ] @@ -270,9 +355,9 @@ "file": "NODfbab.mp4", "md5": "9b0636f8c0f7614afa4ea5e4c6e57e83", "info_dict": { + "uploader": "ytdl", "title": "test chars: \"'/\\ä<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ." } - }, { "name": "TED", @@ -290,14 +375,19 @@ "file": "11741.mp4", "md5": "0b49f4844a068f8b33f4b7c88405862b", "info_dict": { - "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" + "description": "Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?", + "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" } }, { "name": "Generic", "url": "http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html", "file": "13601338388002.mp4", - "md5": "85b90ccc9d73b4acd9138d3af4c27f89" + "md5": "85b90ccc9d73b4acd9138d3af4c27f89", + "info_dict": { + "uploader": "www.hodiho.fr", + "title": "Régis plante sa Jeep" + } }, { "name": "Spiegel", @@ -355,42 +445,59 @@ "file":"30510138.mp3", "md5":"f9136bf103901728f29e419d2c70f55d", "info_dict": { - "title":"D-D-Dance" + "upload_date": "20111213", + "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", + "uploader": "The Royal Concept", + "title": "D-D-Dance" } }, { "file":"47127625.mp3", "md5":"09b6758a018470570f8fd423c9453dd8", "info_dict": { - "title":"The Royal Concept - Gimme Twice" + "upload_date": "20120521", + "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", + "uploader": "The Royal Concept", + "title": "The Royal Concept - Gimme Twice" } }, { "file":"47127627.mp3", "md5":"154abd4e418cea19c3b901f1e1306d9c", "info_dict": { - "title":"Goldrushed" + "upload_date": "20120521", + "uploader": "The Royal Concept", + "title": "Goldrushed" } }, { "file":"47127629.mp3", "md5":"2f5471edc79ad3f33a683153e96a79c1", "info_dict": { - "title":"In the End" + "upload_date": "20120521", + "description": "The Royal Concept from Stockholm\r\nFilip / Povel / David / Magnus\r\nwww.royalconceptband.com", + "uploader": "The Royal Concept", + "title": "In the End" } }, { "file":"47127631.mp3", "md5":"f9ba87aa940af7213f98949254f1c6e2", "info_dict": { - "title":"Knocked Up" + "upload_date": "20120521", + "description": "The Royal Concept from Stockholm\r\nFilip / David / Povel / Magnus\r\nwww.theroyalconceptband.com", + "uploader": "The Royal Concept", + "title": "Knocked Up" } }, { "file":"75206121.mp3", "md5":"f9d1fe9406717e302980c30de4af9353", "info_dict": { - "title":"World On Fire" + "upload_date": "20130116", + "description": "The unreleased track World on Fire premiered on the CW's hit show Arrow (8pm/7pm central). \r\nAs a gift to our fans we would like to offer you a free download of the track! ", + "uploader": "The Royal Concept", + "title": "World On Fire" } } ] @@ -419,8 +526,10 @@ "url": "http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0", "file": "zpsc0c3b9fa.mp4", "md5": "7dabfb92b0a31f6c16cebc0f8e60ff99", - "info_dict":{ - "title":"Tired of Link Building? Try BacklinkMyDomain.com!" + "info_dict": { + "upload_date": "20130504", + "uploader": "rachaneronas", + "title": "Tired of Link Building? Try BacklinkMyDomain.com!" } }, { @@ -488,8 +597,10 @@ "url": "http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html", "file": "1509445.flv", "md5": "9f48e0e8d58e3076bb236ff412ab62fa", - "info_dict":{ - "title":"FemaleAgent Shy beauty takes the bait" + "info_dict": { + "upload_date": "20121014", + "uploader_id": "Ruseful2011", + "title": "FemaleAgent Shy beauty takes the bait" } }, { diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 6060a5988..24e9c4cc7 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -2377,8 +2377,8 @@ class EscapistIE(InfoExtractor): showName = mobj.group('showname') videoId = mobj.group('episode') - self.report_extraction(showName) - webpage = self._download_webpage(url, showName) + self.report_extraction(videoId) + webpage = self._download_webpage(url, videoId) videoDesc = self._html_search_regex(']+>(?P[^>]+)', + video_uploader_id = self._html_search_regex(r']+>(?P[^<]+)', webpage, u'uploader id', default=u'anonymous') video_thumbnail = self._search_regex(r'\'image\':\'(?P[^\']+)\'',