Update to ytdl-commit-dfbbe29

[redbulltv] fix embed data extraction
dfbbe2902f
pull/331/head
pukkandan 4 years ago
parent 5014558ab9
commit b73612a254
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698

@ -6,7 +6,7 @@ from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlencode from ..compat import compat_urllib_parse_urlencode
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
unescapeHTML merge_dicts,
) )
@ -24,7 +24,8 @@ class EroProfileIE(InfoExtractor):
'title': 'sexy babe softcore', 'title': 'sexy babe softcore',
'thumbnail': r're:https?://.*\.jpg', 'thumbnail': r're:https?://.*\.jpg',
'age_limit': 18, 'age_limit': 18,
} },
'skip': 'Video not found',
}, { }, {
'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file', 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
'md5': '1baa9602ede46ce904c431f5418d8916', 'md5': '1baa9602ede46ce904c431f5418d8916',
@ -77,19 +78,15 @@ class EroProfileIE(InfoExtractor):
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
webpage, 'video id', default=None) webpage, 'video id', default=None)
video_url = unescapeHTML(self._search_regex(
r'<source src="([^"]+)', webpage, 'video url'))
title = self._html_search_regex( title = self._html_search_regex(
r'Title:</th><td>([^<]+)</td>', webpage, 'title') (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'),
thumbnail = self._search_regex( webpage, 'title')
r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
webpage, 'thumbnail', fatal=False) info = self._parse_html5_media_entries(url, webpage, video_id)[0]
return { return merge_dicts(info, {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
'url': video_url,
'title': title, 'title': title,
'thumbnail': thumbnail,
'age_limit': 18, 'age_limit': 18,
} })

@ -985,6 +985,7 @@ from .platzi import (
from .playfm import PlayFMIE from .playfm import PlayFMIE
from .playplustv import PlayPlusTVIE from .playplustv import PlayPlusTVIE
from .plays import PlaysTVIE from .plays import PlaysTVIE
from .playstuff import PlayStuffIE
from .playtvak import PlaytvakIE from .playtvak import PlaytvakIE
from .playvid import PlayvidIE from .playvid import PlayvidIE
from .playwire import PlaywireIE from .playwire import PlaywireIE

@ -126,6 +126,7 @@ from .viqeo import ViqeoIE
from .expressen import ExpressenIE from .expressen import ExpressenIE
from .zype import ZypeIE from .zype import ZypeIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .vk import VKIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .gedidigital import GediDigitalIE from .gedidigital import GediDigitalIE
from .rcs import RCSEmbedsIE from .rcs import RCSEmbedsIE
@ -2252,6 +2253,10 @@ class GenericIE(InfoExtractor):
'playlist_mincount': 52, 'playlist_mincount': 52,
}, },
{ {
# Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
'only_matching': True,
}, {
# WimTv embed player # WimTv embed player
'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/', 'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/',
'info_dict': { 'info_dict': {
@ -2803,6 +2808,11 @@ class GenericIE(InfoExtractor):
if odnoklassniki_url: if odnoklassniki_url:
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
# Look for sibnet embedded player
sibnet_urls = VKIE._extract_sibnet_urls(webpage)
if sibnet_urls:
return self.playlist_from_matches(sibnet_urls, video_id, video_title)
# Look for embedded ivi player # Look for embedded ivi player
mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
if mobj is not None: if mobj is not None:
@ -3454,6 +3464,9 @@ class GenericIE(InfoExtractor):
'url': src, 'url': src,
'ext': (mimetype2ext(src_type) 'ext': (mimetype2ext(src_type)
or ext if ext in KNOWN_EXTENSIONS else 'mp4'), or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
'http_headers': {
'Referer': full_response.geturl(),
},
}) })
if formats: if formats:
self._sort_formats(formats) self._sort_formats(formats)
@ -3522,7 +3535,7 @@ class GenericIE(InfoExtractor):
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage) m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player: # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
if m_video_type is not None: if m_video_type is not None:
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)) found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
if not found: if not found:
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search( found = re.search(

@ -182,7 +182,7 @@ class ORFRadioIE(InfoExtractor):
duration = end - start if end and start else None duration = end - start if end and start else None
entries.append({ entries.append({
'id': loop_stream_id.replace('.mp3', ''), 'id': loop_stream_id.replace('.mp3', ''),
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id), 'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
'title': title, 'title': title,
'description': clean_html(data.get('subtitle')), 'description': clean_html(data.get('subtitle')),
'duration': duration, 'duration': duration,

@ -9,8 +9,9 @@ from ..compat import compat_str
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
merge_dicts, merge_dicts,
try_get,
unified_timestamp, unified_timestamp,
xpath_text, urljoin,
) )
@ -27,10 +28,11 @@ class PhoenixIE(ZDFBaseIE):
'title': 'Wohin führt der Protest in der Pandemie?', 'title': 'Wohin führt der Protest in der Pandemie?',
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd', 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
'duration': 1691, 'duration': 1691,
'timestamp': 1613906100, 'timestamp': 1613902500,
'upload_date': '20210221', 'upload_date': '20210221',
'uploader': 'Phoenix', 'uploader': 'Phoenix',
'channel': 'corona nachgehakt', 'series': 'corona nachgehakt',
'episode': 'Wohin führt der Protest in der Pandemie?',
}, },
}, { }, {
# Youtube embed # Youtube embed
@ -79,36 +81,39 @@ class PhoenixIE(ZDFBaseIE):
video_id = compat_str(video.get('basename') or video.get('content')) video_id = compat_str(video.get('basename') or video.get('content'))
details = self._download_xml( details = self._download_json(
'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php', 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
video_id, 'Downloading details XML', query={ video_id, 'Downloading details JSON', query={
'ak': 'web', 'ak': 'web',
'ptmd': 'true', 'ptmd': 'true',
'id': video_id, 'id': video_id,
'profile': 'player2', 'profile': 'player2',
}) })
title = title or xpath_text( title = title or details['title']
details, './/information/title', 'title', fatal=True) content_id = details['tracking']['nielsen']['content']['assetid']
content_id = xpath_text(
details, './/video/details/basename', 'content id', fatal=True)
info = self._extract_ptmd( info = self._extract_ptmd(
'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id, 'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
content_id, None, url) content_id, None, url)
timestamp = unified_timestamp(xpath_text(details, './/details/airtime')) duration = int_or_none(try_get(
details, lambda x: x['tracking']['nielsen']['content']['length']))
timestamp = unified_timestamp(details.get('editorialDate'))
series = try_get(
details, lambda x: x['tracking']['nielsen']['content']['program'],
compat_str)
episode = title if details.get('contentType') == 'episode' else None
thumbnails = [] thumbnails = []
for node in details.findall('.//teaserimages/teaserimage'): teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
thumbnail_url = node.text for thumbnail_key, thumbnail_url in teaser_images.items():
thumbnail_url = urljoin(url, thumbnail_url)
if not thumbnail_url: if not thumbnail_url:
continue continue
thumbnail = { thumbnail = {
'url': thumbnail_url, 'url': thumbnail_url,
} }
thumbnail_key = node.get('key')
if thumbnail_key:
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
if m: if m:
thumbnail['width'] = int(m.group(1)) thumbnail['width'] = int(m.group(1))
@ -118,11 +123,11 @@ class PhoenixIE(ZDFBaseIE):
return merge_dicts(info, { return merge_dicts(info, {
'id': content_id, 'id': content_id,
'title': title, 'title': title,
'description': xpath_text(details, './/information/detail'), 'description': details.get('leadParagraph'),
'duration': int_or_none(xpath_text(details, './/details/lengthSec')), 'duration': duration,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'timestamp': timestamp, 'timestamp': timestamp,
'uploader': xpath_text(details, './/details/channel'), 'uploader': details.get('tvService'),
'uploader_id': xpath_text(details, './/details/originChannelId'), 'series': series,
'channel': xpath_text(details, './/details/originChannelTitle'), 'episode': episode,
}) })

@ -0,0 +1,65 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
smuggle_url,
try_get,
)
class PlayStuffIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?play\.stuff\.co\.nz/details/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://play.stuff.co.nz/details/608778ac1de1c4001a3fa09a',
'md5': 'c82d3669e5247c64bc382577843e5bd0',
'info_dict': {
'id': '6250584958001',
'ext': 'mp4',
'title': 'Episode 1: Rotorua/Mt Maunganui/Tauranga',
'description': 'md5:c154bafb9f0dd02d01fd4100fb1c1913',
'uploader_id': '6005208634001',
'timestamp': 1619491027,
'upload_date': '20210427',
},
'add_ie': ['BrightcoveNew'],
}, {
# geo restricted, bypassable
'url': 'https://play.stuff.co.nz/details/_6155660351001',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
state = self._parse_json(
self._search_regex(
r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'state'),
video_id)
account_id = try_get(
state, lambda x: x['configurations']['accountId'],
compat_str) or '6005208634001'
player_id = try_get(
state, lambda x: x['configurations']['playerId'],
compat_str) or 'default'
entries = []
for item_id, video in state['items'].items():
if not isinstance(video, dict):
continue
asset_id = try_get(
video, lambda x: x['content']['attributes']['assetId'],
compat_str)
if not asset_id:
continue
entries.append(self.url_result(
smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, asset_id),
{'geo_countries': ['NZ']}),
'BrightcoveNew', video_id))
return self.playlist_result(entries, video_id)

@ -133,8 +133,10 @@ class RedBullEmbedIE(RedBullTVIE):
rrn_id = self._match_id(url) rrn_id = self._match_id(url)
asset_id = self._download_json( asset_id = self._download_json(
'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql', 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql',
rrn_id, headers={'API-KEY': 'e90a1ff11335423998b100c929ecc866'}, rrn_id, headers={
query={ 'Accept': 'application/json',
'API-KEY': 'e90a1ff11335423998b100c929ecc866',
}, query={
'query': '''{ 'query': '''{
resource(id: "%s", enforceGeoBlocking: false) { resource(id: "%s", enforceGeoBlocking: false) {
%s %s

@ -21,6 +21,7 @@ from ..utils import (
class ShahidBaseIE(AWSIE): class ShahidBaseIE(AWSIE):
_AWS_PROXY_HOST = 'api2.shahid.net' _AWS_PROXY_HOST = 'api2.shahid.net'
_AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh' _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh'
_VALID_URL_BASE = r'https?://shahid\.mbc\.net/[a-z]{2}/'
def _handle_error(self, e): def _handle_error(self, e):
fail_data = self._parse_json( fail_data = self._parse_json(
@ -49,7 +50,7 @@ class ShahidBaseIE(AWSIE):
class ShahidIE(ShahidBaseIE): class ShahidIE(ShahidBaseIE):
_NETRC_MACHINE = 'shahid' _NETRC_MACHINE = 'shahid'
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)' _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924', 'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
'info_dict': { 'info_dict': {
@ -73,6 +74,9 @@ class ShahidIE(ShahidBaseIE):
# shahid plus subscriber only # shahid plus subscriber only
'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511', 'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511',
'only_matching': True 'only_matching': True
}, {
'url': 'https://shahid.mbc.net/en/shows/Ramez-Fi-Al-Shallal-season-1-episode-1/episode-359319',
'only_matching': True
}] }]
def _real_initialize(self): def _real_initialize(self):
@ -168,7 +172,7 @@ class ShahidIE(ShahidBaseIE):
class ShahidShowIE(ShahidBaseIE): class ShahidShowIE(ShahidBaseIE):
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)' _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187', 'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187',
'info_dict': { 'info_dict': {

@ -86,10 +86,10 @@ class SharedIE(SharedBaseIE):
class VivoIE(SharedBaseIE): class VivoIE(SharedBaseIE):
IE_DESC = 'vivo.sx' IE_DESC = 'vivo.sx'
_VALID_URL = r'https?://vivo\.sx/(?P<id>[\da-z]{10})' _VALID_URL = r'https?://vivo\.s[xt]/(?P<id>[\da-z]{10})'
_FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'
_TEST = { _TESTS = [{
'url': 'http://vivo.sx/d7ddda0e78', 'url': 'http://vivo.sx/d7ddda0e78',
'md5': '15b3af41be0b4fe01f4df075c2678b2c', 'md5': '15b3af41be0b4fe01f4df075c2678b2c',
'info_dict': { 'info_dict': {
@ -98,7 +98,10 @@ class VivoIE(SharedBaseIE):
'title': 'Chicken', 'title': 'Chicken',
'filesize': 515659, 'filesize': 515659,
}, },
} }, {
'url': 'http://vivo.st/d7ddda0e78',
'only_matching': True,
}]
def _extract_title(self, webpage): def _extract_title(self, webpage):
title = self._html_search_regex( title = self._html_search_regex(

@ -300,6 +300,13 @@ class VKIE(VKBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_sibnet_urls(webpage):
# https://help.sibnet.ru/?sibnet_video_embed
return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
webpage)]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('videoid') video_id = mobj.group('videoid')
@ -408,6 +415,10 @@ class VKIE(VKBaseIE):
if odnoklassniki_url: if odnoklassniki_url:
return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key()) return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
sibnet_urls = self._extract_sibnet_urls(info_page)
if sibnet_urls:
return self.url_result(sibnet_urls[0])
m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page) m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
if m_opts: if m_opts:
m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1)) m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))

Loading…
Cancel
Save