From cbb127568a6182df2c5a2d65426de523f1f7b43f Mon Sep 17 00:00:00 2001 From: Olivier Bilodeau Date: Thu, 15 Dec 2016 20:14:04 -0500 Subject: [PATCH] [vrak] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vrak.py | 68 ++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 youtube_dl/extractor/vrak.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b1613a9d35..0ac42138ad 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1165,6 +1165,7 @@ from .voicerepublic import VoiceRepublicIE from .voxmedia import VoxMediaIE from .vporn import VpornIE from .vrt import VRTIE +from .vrak import VrakIE from .vube import VubeIE from .vuclip import VuClipIE from .vvvvid import VVVVIDIE diff --git a/youtube_dl/extractor/vrak.py b/youtube_dl/extractor/vrak.py new file mode 100644 index 0000000000..692e2fcfc9 --- /dev/null +++ b/youtube_dl/extractor/vrak.py @@ -0,0 +1,68 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from .brightcove import BrightcoveNewIE + + +class VrakIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vrak\.tv/videos\?.*?target=(?P[0-9\.]+).*' + _TEST = { + 'url': 'http://www.vrak.tv/videos?target=1.2240923&filtre=emission&id=1.1806721', + 'md5': 'c5d5ce237bca3b1e990ce1b48d1f0948', + 'info_dict': { + 'id': '5231040869001', + 'ext': 'mp4', + 'title': 'Référendums américains, animés japonais et hooligans russes', + 'upload_date': '20161201', + 'description': 'This video file has been uploaded automatically using Oprah. It should be updated with real description soon.', + 'timestamp': 1480628425, + 'uploader_id': '2890187628001', + } + } + + def _real_extract(self, url): + url_id = self._match_id(url) + webpage = self._download_webpage(url, url_id) + + result = {} + result['title'] = self._html_search_regex( + r'

(.+?)

', webpage, 'title') + + # Inspired from BrightcoveNewIE._extract_url() + entries = [] + for account_id, player_id, _, video_id in re.findall( + # account_id, player_id and embed from: + #
]+ + data-publisher-id=["\'](\d+)["\'] + [^>]* + data-player-id=["\']([^"\']+)["\'] + [^>]* + refId":"([^&]+)" + [^>]* + >.*? +
.*? + RW\ java\.lang\.String\ value\ =\ \'brightcove\.article\.\d+\.\3\' + [^>]* + RW\ java\.lang\.String\ value\ =\ \'(\d+)\' + ''', webpage): + + entries.append( + 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' + % (account_id, player_id, 'default', video_id)) + + if entries: + result = self.url_result(entries[0], BrightcoveNewIE.ie_key()) + + return result