From 405ec05cb2a1bb1ce27353a831924c17f57b86f4 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 26 Jun 2013 15:25:53 +0500 Subject: [PATCH 1/6] added an IE for wimp.com --- youtube_dl/extractor/__init__.py | 2 ++ youtube_dl/extractor/wimp.py | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 youtube_dl/extractor/wimp.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0ea9908604..82927610ab 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -49,6 +49,7 @@ from .vbox7 import Vbox7IE from .vevo import VevoIE from .vimeo import VimeoIE from .vine import VineIE +from .wimp import WimpIE from .worldstarhiphop import WorldStarHipHopIE from .xhamster import XHamsterIE from .xnxx import XNXXIE @@ -132,6 +133,7 @@ def gen_extractors(): VevoIE(), JukeboxIE(), TudouIE(), + WimpIE(), GenericIE() ] diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py new file mode 100644 index 0000000000..9d52c947ea --- /dev/null +++ b/youtube_dl/extractor/wimp.py @@ -0,0 +1,25 @@ +import re +import base64 +from .common import InfoExtractor + + +class WimpIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + title = re.search('\',webpage).group(1) + thumbnail_url = re.search('\',webpage).group(1) + googleString = re.search("googleCode = '(.*?)'", webpage) + googleString = base64.b64decode(googleString.group(1)) + final_url = re.search('","(.*?)"', googleString).group(1) + ext = final_url.split('.')[-1] + return [{ + 'id': video_id, + 'url': final_url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail_url, + }] From 6b4642fae32eea550d84333b0631c4afb0f3e8c5 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 26 Jun 2013 15:40:24 +0500 Subject: [PATCH 2/6] added test for wimp.com --- test/tests.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/tests.json b/test/tests.json index 5f4f642e8a..a70ddf34a6 100644 --- a/test/tests.json +++ b/test/tests.json @@ -695,5 +695,14 @@ "info_dict": { "title": "卡马乔国足开大脚长传冲吊集锦" } + }, + { + "name": "Wimp", + "url": "http://www.wimp.com/deerfence/", + "file": "deerfence.flv", + "md5": "8b215e2e0168c6081a1cf84b2846a2b5", + "info_dict": { + "title": "Watch Till End - Herd of deer jump over a fence.", + } } ] From 8bcc355972020086672b0a3d8dcc2f38694f4672 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 26 Jun 2013 15:51:25 +0500 Subject: [PATCH 3/6] removed trailing ',' and corrected the title in test --- test/tests.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/tests.json b/test/tests.json index a70ddf34a6..816fabf70e 100644 --- a/test/tests.json +++ b/test/tests.json @@ -702,7 +702,7 @@ "file": "deerfence.flv", "md5": "8b215e2e0168c6081a1cf84b2846a2b5", "info_dict": { - "title": "Watch Till End - Herd of deer jump over a fence.", + "title": "Watch Till End: Herd of deer jump over a fence." } } ] From 5abeaf06506b35e4c0db315e847ce32843742fe2 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 26 Jun 2013 17:26:59 +0500 Subject: [PATCH 4/6] changed wimp.py according to the changes suggested by jaime --- youtube_dl/extractor/wimp.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 9d52c947ea..811b37cc1b 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -10,11 +10,11 @@ class WimpIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group(1) webpage = self._download_webpage(url, video_id) - title = re.search('\',webpage).group(1) - thumbnail_url = re.search('\',webpage).group(1) - googleString = re.search("googleCode = '(.*?)'", webpage) - googleString = base64.b64decode(googleString.group(1)) - final_url = re.search('","(.*?)"', googleString).group(1) + title = self._search_regex('\',webpage, 'video title') + thumbnail_url = self._search_regex('\',webpage,'video thumbnail') + googleString = self._search_regex("googleCode = '(.*?)'", webpage,'file url') + googleString = base64.b64decode(googleString) + final_url = self._search_regex('","(.*?)"', googleString,'final video url') ext = final_url.split('.')[-1] return [{ 'id': video_id, @@ -23,3 +23,4 @@ class WimpIE(InfoExtractor): 'title': title, 'thumbnail': thumbnail_url, }] + From f64e7695a174b597d62a7cd6211d69b5b0f0d0a0 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 26 Jun 2013 18:46:05 +0500 Subject: [PATCH 5/6] added b'' to my regex expression in order to solve the error on python 3 --- youtube_dl/extractor/wimp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 811b37cc1b..9ff5112a6b 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -14,7 +14,7 @@ class WimpIE(InfoExtractor): thumbnail_url = self._search_regex('\',webpage,'video thumbnail') googleString = self._search_regex("googleCode = '(.*?)'", webpage,'file url') googleString = base64.b64decode(googleString) - final_url = self._search_regex('","(.*?)"', googleString,'final video url') + final_url = self._search_regex(b'","(.*?)"', googleString,'final video url') ext = final_url.split('.')[-1] return [{ 'id': video_id, From b1dfdc51b1062f8e6c5a3270ec04fbf18cd5a867 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 26 Jun 2013 19:41:55 +0500 Subject: [PATCH 6/6] added .decode('ascii') --- youtube_dl/extractor/wimp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 9ff5112a6b..a548e0fa06 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -13,8 +13,8 @@ class WimpIE(InfoExtractor): title = self._search_regex('\',webpage, 'video title') thumbnail_url = self._search_regex('\',webpage,'video thumbnail') googleString = self._search_regex("googleCode = '(.*?)'", webpage,'file url') - googleString = base64.b64decode(googleString) - final_url = self._search_regex(b'","(.*?)"', googleString,'final video url') + googleString = base64.b64decode(googleString).decode('ascii') + final_url = self._search_regex('","(.*?)"', googleString,'final video url') ext = final_url.split('.')[-1] return [{ 'id': video_id,