Merge branch 'master' of https://github.com/ytdl-org/youtube-dl into mkvthumbnail
commit
aa9a04bab9
@ -0,0 +1,103 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from .vimeo import VimeoIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class AmaraIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# Youtube
|
||||
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
|
||||
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
|
||||
'info_dict': {
|
||||
'id': 'h6ZuVdvYnfE',
|
||||
'ext': 'mp4',
|
||||
'title': 'Why jury trials are becoming less common',
|
||||
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'upload_date': '20160813',
|
||||
'uploader': 'PBS NewsHour',
|
||||
'uploader_id': 'PBSNewsHour',
|
||||
'timestamp': 1549639570,
|
||||
}
|
||||
}, {
|
||||
# Vimeo
|
||||
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||
'md5': '99392c75fa05d432a8f11df03612195e',
|
||||
'info_dict': {
|
||||
'id': '18622084',
|
||||
'ext': 'mov',
|
||||
'title': 'Vimeo at CES 2011!',
|
||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'timestamp': 1294763658,
|
||||
'upload_date': '20110111',
|
||||
'uploader': 'Sam Morrill',
|
||||
'uploader_id': 'sammorrill'
|
||||
}
|
||||
}, {
|
||||
# Direct Link
|
||||
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||
'md5': 'd3970f08512738ee60c5807311ff5d3f',
|
||||
'info_dict': {
|
||||
'id': 's8KL7I3jLmh6',
|
||||
'ext': 'mp4',
|
||||
'title': 'The danger of a single story',
|
||||
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'subtitles': dict,
|
||||
'upload_date': '20091007',
|
||||
'timestamp': 1254942511,
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
meta = self._download_json(
|
||||
'https://amara.org/api/videos/%s/' % video_id,
|
||||
video_id, query={'format': 'json'})
|
||||
title = meta['title']
|
||||
video_url = meta['all_urls'][0]
|
||||
|
||||
subtitles = {}
|
||||
for language in (meta.get('languages') or []):
|
||||
subtitles_uri = language.get('subtitles_uri')
|
||||
if not (subtitles_uri and language.get('published')):
|
||||
continue
|
||||
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
|
||||
for f in ('json', 'srt', 'vtt'):
|
||||
subtitle.append({
|
||||
'ext': f,
|
||||
'url': update_url_query(subtitles_uri, {'format': f}),
|
||||
})
|
||||
|
||||
info = {
|
||||
'url': video_url,
|
||||
'id': video_id,
|
||||
'subtitles': subtitles,
|
||||
'title': title,
|
||||
'description': meta.get('description'),
|
||||
'thumbnail': meta.get('thumbnail'),
|
||||
'duration': int_or_none(meta.get('duration')),
|
||||
'timestamp': parse_iso8601(meta.get('created')),
|
||||
}
|
||||
|
||||
for ie in (YoutubeIE, VimeoIE):
|
||||
if ie.suitable(video_url):
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': ie.ie_key(),
|
||||
})
|
||||
break
|
||||
|
||||
return info
|
@ -0,0 +1,98 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
parse_iso8601,
|
||||
# try_get,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class BoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||
'info_dict': {
|
||||
'id': '510727257538',
|
||||
'ext': 'mp4',
|
||||
'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
|
||||
'uploader': 'MLS Video',
|
||||
'timestamp': 1566320259,
|
||||
'upload_date': '20190820',
|
||||
'uploader_id': '235196876',
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
shared_name, file_id = re.match(self._VALID_URL, url).groups()
|
||||
webpage = self._download_webpage(url, file_id)
|
||||
request_token = self._parse_json(self._search_regex(
|
||||
r'Box\.config\s*=\s*({.+?});', webpage,
|
||||
'Box config'), file_id)['requestToken']
|
||||
access_token = self._download_json(
|
||||
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||
'Downloading token JSON metadata',
|
||||
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'X-Request-Token': request_token,
|
||||
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||
})[file_id]['read']
|
||||
shared_link = 'https://app.box.com/s/' + shared_name
|
||||
f = self._download_json(
|
||||
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||
'Downloading file JSON metadata', headers={
|
||||
'Authorization': 'Bearer ' + access_token,
|
||||
'BoxApi': 'shared_link=' + shared_link,
|
||||
'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
|
||||
}, query={
|
||||
'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
|
||||
})
|
||||
title = f['name']
|
||||
|
||||
query = {
|
||||
'access_token': access_token,
|
||||
'shared_link': shared_link
|
||||
}
|
||||
|
||||
formats = []
|
||||
|
||||
# for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
|
||||
# entry_url_template = try_get(
|
||||
# entry, lambda x: x['content']['url_template'])
|
||||
# if not entry_url_template:
|
||||
# continue
|
||||
# representation = entry.get('representation')
|
||||
# if representation == 'dash':
|
||||
# TODO: append query to every fragment URL
|
||||
# formats.extend(self._extract_mpd_formats(
|
||||
# entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
|
||||
# file_id, query=query))
|
||||
|
||||
authenticated_download_url = f.get('authenticated_download_url')
|
||||
if authenticated_download_url and f.get('is_download_available'):
|
||||
formats.append({
|
||||
'ext': f.get('extension') or determine_ext(title),
|
||||
'filesize': f.get('size'),
|
||||
'format_id': 'download',
|
||||
'url': update_url_query(authenticated_download_url, query),
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
creator = f.get('created_by') or {}
|
||||
|
||||
return {
|
||||
'id': file_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'description': f.get('description') or None,
|
||||
'uploader': creator.get('name'),
|
||||
'timestamp': parse_iso8601(f.get('created_at')),
|
||||
'uploader_id': creator.get('id'),
|
||||
}
|
@ -0,0 +1,91 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class LBRYIE(InfoExtractor):
|
||||
IE_NAME = 'lbry.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[^:]+:[0-9a-z]+/[^:]+:[0-9a-z])'
|
||||
_TESTS = [{
|
||||
# Video
|
||||
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
|
||||
'md5': '65bd7ec1f6744ada55da8e4c48a2edf9',
|
||||
'info_dict': {
|
||||
'id': '17f983b61f53091fb8ea58a9c56804e4ff8cff4d',
|
||||
'ext': 'mp4',
|
||||
'title': 'First day in LBRY? Start HERE!',
|
||||
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
|
||||
'timestamp': 1595694354,
|
||||
'upload_date': '20200725',
|
||||
}
|
||||
}, {
|
||||
# Audio
|
||||
'url': 'https://lbry.tv/@LBRYFoundation:0/Episode-1:e',
|
||||
'md5': 'c94017d3eba9b49ce085a8fad6b98d00',
|
||||
'info_dict': {
|
||||
'id': 'e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
|
||||
'ext': 'mp3',
|
||||
'title': 'The LBRY Foundation Community Podcast Episode 1 - Introduction, Streaming on LBRY, Transcoding',
|
||||
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
|
||||
'timestamp': 1591312601,
|
||||
'upload_date': '20200604',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _call_api_proxy(self, method, display_id, params):
|
||||
return self._download_json(
|
||||
'https://api.lbry.tv/api/v1/proxy', display_id,
|
||||
headers={'Content-Type': 'application/json-rpc'},
|
||||
data=json.dumps({
|
||||
'method': method,
|
||||
'params': params,
|
||||
}).encode())['result']
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url).replace(':', '#')
|
||||
uri = 'lbry://' + display_id
|
||||
result = self._call_api_proxy(
|
||||
'resolve', display_id, {'urls': [uri]})[uri]
|
||||
result_value = result['value']
|
||||
if result_value.get('stream_type') not in ('video', 'audio'):
|
||||
raise ExtractorError('Unsupported URL', expected=True)
|
||||
streaming_url = self._call_api_proxy(
|
||||
'get', display_id, {'uri': uri})['streaming_url']
|
||||
source = result_value.get('source') or {}
|
||||
media = result_value.get('video') or result_value.get('audio') or {}
|
||||
signing_channel = result_value.get('signing_channel') or {}
|
||||
|
||||
return {
|
||||
'id': result['claim_id'],
|
||||
'title': result_value['title'],
|
||||
'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str),
|
||||
'description': result_value.get('description'),
|
||||
'license': result_value.get('license'),
|
||||
'timestamp': int_or_none(result.get('timestamp')),
|
||||
'tags': result_value.get('tags'),
|
||||
'width': int_or_none(media.get('width')),
|
||||
'height': int_or_none(media.get('height')),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'channel': signing_channel.get('name'),
|
||||
'channel_id': signing_channel.get('claim_id'),
|
||||
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
|
||||
'filesize': int_or_none(source.get('size')),
|
||||
'url': streaming_url,
|
||||
}
|
@ -0,0 +1,176 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class PinterestBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:[^/]+\.)?pinterest\.(?:com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx|dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu|co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)'
|
||||
|
||||
def _extract_resource(self, webpage, video_id):
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'<script[^>]+\bid=["\']initial-state["\'][^>]*>({.+?})</script>',
|
||||
webpage, 'application json'),
|
||||
video_id)['resourceResponses']
|
||||
|
||||
def _extract_video(self, data, extract_formats=True):
|
||||
video_id = data['id']
|
||||
|
||||
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
||||
|
||||
formats = []
|
||||
duration = None
|
||||
if extract_formats:
|
||||
for format_id, format_dict in data['videos']['video_list'].items():
|
||||
if not isinstance(format_dict, dict):
|
||||
continue
|
||||
format_url = url_or_none(format_dict.get('url'))
|
||||
if not format_url:
|
||||
continue
|
||||
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
||||
ext = determine_ext(format_url)
|
||||
if 'hls' in format_id.lower() or ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=format_id, fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(format_dict.get('width')),
|
||||
'height': int_or_none(format_dict.get('height')),
|
||||
'duration': duration,
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
description = data.get('description') or data.get('description_html') or data.get('seo_description')
|
||||
timestamp = unified_timestamp(data.get('created_at'))
|
||||
|
||||
def _u(field):
|
||||
return try_get(data, lambda x: x['closeup_attribution'][field], compat_str)
|
||||
|
||||
uploader = _u('full_name')
|
||||
uploader_id = _u('id')
|
||||
|
||||
repost_count = int_or_none(data.get('repin_count'))
|
||||
comment_count = int_or_none(data.get('comment_count'))
|
||||
categories = try_get(data, lambda x: x['pin_join']['visual_annotation'], list)
|
||||
tags = data.get('hashtags')
|
||||
|
||||
thumbnails = []
|
||||
images = data.get('images')
|
||||
if isinstance(images, dict):
|
||||
for thumbnail_id, thumbnail in images.items():
|
||||
if not isinstance(thumbnail, dict):
|
||||
continue
|
||||
thumbnail_url = url_or_none(thumbnail.get('url'))
|
||||
if not thumbnail_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': thumbnail_url,
|
||||
'width': int_or_none(thumbnail.get('width')),
|
||||
'height': int_or_none(thumbnail.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'duration': duration,
|
||||
'timestamp': timestamp,
|
||||
'thumbnails': thumbnails,
|
||||
'uploader': uploader,
|
||||
'uploader_id': uploader_id,
|
||||
'repost_count': repost_count,
|
||||
'comment_count': comment_count,
|
||||
'categories': categories,
|
||||
'tags': tags,
|
||||
'formats': formats,
|
||||
'extractor_key': PinterestIE.ie_key(),
|
||||
}
|
||||
|
||||
|
||||
class PinterestIE(PinterestBaseIE):
|
||||
_VALID_URL = r'%s/pin/(?P<id>\d+)' % PinterestBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pinterest.com/pin/664281013778109217/',
|
||||
'md5': '6550c2af85d6d9f3fe3b88954d1577fc',
|
||||
'info_dict': {
|
||||
'id': '664281013778109217',
|
||||
'ext': 'mp4',
|
||||
'title': 'Origami',
|
||||
'description': 'md5:b9d90ddf7848e897882de9e73344f7dd',
|
||||
'duration': 57.7,
|
||||
'timestamp': 1593073622,
|
||||
'upload_date': '20200625',
|
||||
'uploader': 'Love origami -I am Dafei',
|
||||
'uploader_id': '586523688879454212',
|
||||
'repost_count': 50,
|
||||
'comment_count': 0,
|
||||
'categories': list,
|
||||
'tags': list,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://co.pinterest.com/pin/824721750502199491/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._extract_resource(webpage, video_id)[0]['response']['data']
|
||||
return self._extract_video(data)
|
||||
|
||||
|
||||
class PinterestCollectionIE(PinterestBaseIE):
|
||||
_VALID_URL = r'%s/[^/]+/(?P<id>[^/?#&]+)' % PinterestBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.pinterest.ca/mashal0407/cool-diys/',
|
||||
'info_dict': {
|
||||
'id': '585890301462791043',
|
||||
'title': 'cool diys',
|
||||
},
|
||||
'playlist_count': 8,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if PinterestIE.suitable(url) else super(
|
||||
PinterestCollectionIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
collection_name = self._match_id(url)
|
||||
webpage = self._download_webpage(url, collection_name)
|
||||
resource = self._extract_resource(webpage, collection_name)[1]
|
||||
entries = []
|
||||
for item in resource['response']['data']:
|
||||
if not isinstance(item, dict) or item.get('type') != 'pin':
|
||||
continue
|
||||
video_id = item.get('id')
|
||||
if video_id:
|
||||
# Some pins may not be available anonymously via pin URL
|
||||
# video = self._extract_video(item, extract_formats=False)
|
||||
# video.update({
|
||||
# '_type': 'url_transparent',
|
||||
# 'url': 'https://www.pinterest.com/pin/%s/' % video_id,
|
||||
# })
|
||||
# entries.append(video)
|
||||
entries.append(self._extract_video(item))
|
||||
title = try_get(
|
||||
resource, lambda x: x['options']['board_title'], compat_str)
|
||||
collection_id = try_get(
|
||||
resource, lambda x: x['options']['board_id'],
|
||||
compat_str) or collection_name
|
||||
return self.playlist_result(
|
||||
entries, playlist_id=collection_id, playlist_title=title)
|
@ -0,0 +1,67 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class RumbleEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://rumble.com/embed/v5pv5f',
|
||||
'md5': '36a18a049856720189f30977ccbb2c34',
|
||||
'info_dict': {
|
||||
'id': 'v5pv5f',
|
||||
'ext': 'mp4',
|
||||
'title': 'WMAR 2 News Latest Headlines | October 20, 6pm',
|
||||
'timestamp': 1571611968,
|
||||
'upload_date': '20191020',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://rumble.com/embed/ufe9n.v5pv5f',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video = self._download_json(
|
||||
'https://rumble.com/embedJS/', video_id,
|
||||
query={'request': 'video', 'v': video_id})
|
||||
title = video['title']
|
||||
|
||||
formats = []
|
||||
for height, ua in (video.get('ua') or {}).items():
|
||||
for i in range(2):
|
||||
f_url = try_get(ua, lambda x: x[i], compat_str)
|
||||
if f_url:
|
||||
ext = determine_ext(f_url)
|
||||
f = {
|
||||
'ext': ext,
|
||||
'format_id': '%s-%sp' % (ext, height),
|
||||
'height': int_or_none(height),
|
||||
'url': f_url,
|
||||
}
|
||||
bitrate = try_get(ua, lambda x: x[i + 2]['bitrate'])
|
||||
if bitrate:
|
||||
f['tbr'] = int_or_none(bitrate)
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
author = video.get('author') or {}
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video.get('i'),
|
||||
'timestamp': parse_iso8601(video.get('pubDate')),
|
||||
'channel': author.get('name'),
|
||||
'channel_url': author.get('url'),
|
||||
'duration': int_or_none(video.get('duration')),
|
||||
}
|
@ -0,0 +1,239 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
compat_str,
|
||||
compat_parse_qs,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
unified_timestamp,
|
||||
)
|
||||
|
||||
|
||||
class SkyItPlayerIE(InfoExtractor):
|
||||
IE_NAME = 'player.sky.it'
|
||||
_VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
|
||||
_GEO_BYPASS = False
|
||||
_DOMAIN = 'sky'
|
||||
_PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
|
||||
# http://static.sky.it/static/skyplayer/conf.json
|
||||
_TOKEN_MAP = {
|
||||
'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q',
|
||||
'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C',
|
||||
'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota',
|
||||
'salesforce': 'C6D585FD1615272C98DE38235F38BD86',
|
||||
'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE',
|
||||
'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk',
|
||||
'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3',
|
||||
'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd',
|
||||
'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp',
|
||||
}
|
||||
|
||||
def _player_url_result(self, video_id):
|
||||
return self.url_result(
|
||||
self._PLAYER_TMPL % (video_id, self._DOMAIN),
|
||||
SkyItPlayerIE.ie_key(), video_id)
|
||||
|
||||
def _parse_video(self, video, video_id):
|
||||
title = video['title']
|
||||
is_live = video.get('type') == 'live'
|
||||
hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
|
||||
if not hls_url and video.get('geoblock' if is_live else 'geob'):
|
||||
self.raise_geo_restricted(countries=['IT'])
|
||||
|
||||
if is_live:
|
||||
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4')
|
||||
else:
|
||||
formats = self._extract_akamai_formats(
|
||||
hls_url, video_id, {'http': 'videoplatform.sky.it'})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'formats': formats,
|
||||
'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
|
||||
'description': video.get('short_desc') or None,
|
||||
'timestamp': unified_timestamp(video.get('create_date')),
|
||||
'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')),
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
domain = compat_parse_qs(compat_urllib_parse_urlparse(
|
||||
url).query).get('domain', [None])[0]
|
||||
token = dict_get(self._TOKEN_MAP, (domain, 'sky'))
|
||||
video = self._download_json(
|
||||
'https://apid.sky.it/vdp/v1/getVideoData',
|
||||
video_id, query={
|
||||
'caller': 'sky',
|
||||
'id': video_id,
|
||||
'token': token
|
||||
}, headers=self.geo_verification_headers())
|
||||
return self._parse_video(video, video_id)
|
||||
|
||||
|
||||
class SkyItVideoIE(SkyItPlayerIE):
|
||||
IE_NAME = 'video.sky.it'
|
||||
_VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227',
|
||||
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
|
||||
'info_dict': {
|
||||
'id': '631227',
|
||||
'ext': 'mp4',
|
||||
'title': 'Uomo ucciso da uno squalo in Australia',
|
||||
'timestamp': 1606036192,
|
||||
'upload_date': '20201122',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
return self._player_url_result(video_id)
|
||||
|
||||
|
||||
class SkyItVideoLiveIE(SkyItPlayerIE):
|
||||
IE_NAME = 'video.sky.it:live'
|
||||
_VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://video.sky.it/diretta/tg24',
|
||||
'info_dict': {
|
||||
'id': '1',
|
||||
'ext': 'mp4',
|
||||
'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}',
|
||||
'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
asset_id = compat_str(self._parse_json(self._search_regex(
|
||||
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||
webpage, 'next data'), display_id)['props']['initialState']['livePage']['content']['asset_id'])
|
||||
livestream = self._download_json(
|
||||
'https://apid.sky.it/vdp/v1/getLivestream',
|
||||
asset_id, query={'id': asset_id})
|
||||
return self._parse_video(livestream, asset_id)
|
||||
|
||||
|
||||
class SkyItIE(SkyItPlayerIE):
|
||||
IE_NAME = 'sky.it'
|
||||
_VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol',
|
||||
'info_dict': {
|
||||
'id': '631201',
|
||||
'ext': 'mp4',
|
||||
'title': 'Un rosso alla violenza: in campo per i diritti delle donne',
|
||||
'upload_date': '20201121',
|
||||
'timestamp': 1605995753,
|
||||
},
|
||||
'expected_warnings': ['Unable to download f4m manifest'],
|
||||
}, {
|
||||
'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo',
|
||||
'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd',
|
||||
'info_dict': {
|
||||
'id': '631227',
|
||||
'ext': 'mp4',
|
||||
'title': 'Uomo ucciso da uno squalo in Australia',
|
||||
'timestamp': 1606036192,
|
||||
'upload_date': '20201122',
|
||||
},
|
||||
}]
|
||||
_VIDEO_ID_REGEX = r'data-videoid="(\d+)"'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
self._VIDEO_ID_REGEX, webpage, 'video id')
|
||||
return self._player_url_result(video_id)
|
||||
|
||||
|
||||
class SkyItAcademyIE(SkyItIE):
|
||||
IE_NAME = 'skyacademy.it'
|
||||
_VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/',
|
||||
'md5': 'ced5c26638b7863190cbc44dd6f6ba08',
|
||||
'info_dict': {
|
||||
'id': '523458',
|
||||
'ext': 'mp4',
|
||||
'title': 'Sky Academy "The Best CineCamp 2019"',
|
||||
'timestamp': 1562843784,
|
||||
'upload_date': '20190711',
|
||||
}
|
||||
}]
|
||||
_DOMAIN = 'skyacademy'
|
||||
_VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"'
|
||||
|
||||
|
||||
class SkyItArteIE(SkyItIE):
|
||||
IE_NAME = 'arte.sky.it'
|
||||
_VALID_URL = r'https?://arte\.sky\.it/video/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/',
|
||||
'md5': '515aee97b87d7a018b6c80727d3e7e17',
|
||||
'info_dict': {
|
||||
'id': '627926',
|
||||
'ext': 'mp4',
|
||||
'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani",
|
||||
'upload_date': '20201106',
|
||||
'timestamp': 1604664493,
|
||||
}
|
||||
}]
|
||||
_DOMAIN = 'skyarte'
|
||||
_VIDEO_ID_REGEX = r'(?s)<iframe[^>]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)'
|
||||
|
||||
|
||||
class CieloTVItIE(SkyItIE):
|
||||
IE_NAME = 'cielotv.it'
|
||||
_VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P<id>[^.]+)\.html'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html',
|
||||
'md5': 'c4deed77552ba901c2a0d9258320304b',
|
||||
'info_dict': {
|
||||
'id': '499240',
|
||||
'ext': 'mp4',
|
||||
'title': 'Il lunedì è sempre un dramma',
|
||||
'upload_date': '20190329',
|
||||
'timestamp': 1553862178,
|
||||
}
|
||||
}]
|
||||
_DOMAIN = 'cielo'
|
||||
_VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"'
|
||||
|
||||
|
||||
class TV8ItIE(SkyItVideoIE):
|
||||
IE_NAME = 'tv8.it'
|
||||
_VALID_URL = r'https?://tv8\.it/showvideo/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/',
|
||||
'md5': '9ab906a3f75ea342ed928442f9dabd21',
|
||||
'info_dict': {
|
||||
'id': '630529',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero',
|
||||
'timestamp': 1605721374,
|
||||
'upload_date': '20201118',
|
||||
}
|
||||
}]
|
||||
_DOMAIN = 'mtv8'
|
@ -1,17 +0,0 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
|
||||
|
||||
class SpiegeltvIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/',
|
||||
'only_matching': True,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(
|
||||
'https://api.nexx.cloud/v3/748/videos/byid/%s'
|
||||
% self._match_id(url), ie=NexxIE.ie_key())
|
@ -1,74 +1,24 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
from .nbc import NBCIE
|
||||
|
||||
|
||||
class USANetworkIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?usanetwork\.com/(?:[^/]+/videos|movies)/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.usanetwork.com/mrrobot/videos/hpe-cybersecurity',
|
||||
'md5': '33c0d2ba381571b414024440d08d57fd',
|
||||
class USANetworkIE(NBCIE):
|
||||
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?usanetwork\.com/[^/]+/video/[^/]+/(?P<id>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.usanetwork.com/peacock-trailers/video/intelligence-trailer/4185302',
|
||||
'info_dict': {
|
||||
'id': '3086229',
|
||||
'id': '4185302',
|
||||
'ext': 'mp4',
|
||||
'title': 'HPE Cybersecurity',
|
||||
'description': 'The more we digitize our world, the more vulnerable we are.',
|
||||
'upload_date': '20160818',
|
||||
'timestamp': 1471535460,
|
||||
'uploader': 'NBCU-USA',
|
||||
'title': 'Intelligence (Trailer)',
|
||||
'description': 'A maverick NSA agent enlists the help of a junior systems analyst in a workplace power grab.',
|
||||
'upload_date': '20200715',
|
||||
'timestamp': 1594785600,
|
||||
'uploader': 'NBCU-MPAT',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
def _x(name, default=NO_DEFAULT):
|
||||
return self._search_regex(
|
||||
r'data-%s\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
|
||||
webpage, name, default=default, group='value')
|
||||
|
||||
video_id = _x('mpx-guid')
|
||||
title = _x('episode-title')
|
||||
mpx_account_id = _x('mpx-account-id', '2304992029')
|
||||
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
}
|
||||
if _x('is-full-episode', None) == '1':
|
||||
query['manifest'] = 'm3u'
|
||||
|
||||
if _x('is-entitlement', None) == '1':
|
||||
adobe_pass = {}
|
||||
drupal_settings = self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
||||
webpage, 'drupal settings', fatal=False)
|
||||
if drupal_settings:
|
||||
drupal_settings = self._parse_json(drupal_settings, video_id, fatal=False)
|
||||
if drupal_settings:
|
||||
adobe_pass = drupal_settings.get('adobePass', {})
|
||||
resource = self._get_mvpd_resource(
|
||||
adobe_pass.get('adobePassResourceId', 'usa'),
|
||||
title, video_id, _x('episode-rating', 'TV-14'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, adobe_pass.get('adobePassRequestorId', 'usa'), resource)
|
||||
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/HNK2IC/media/guid/%s/%s' % (mpx_account_id, video_id),
|
||||
query), {'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'series': _x('show-title', None),
|
||||
'episode': title,
|
||||
'ie_key': 'ThePlatform',
|
||||
})
|
||||
return info
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2020.09.20'
|
||||
__version__ = '2020.11.21.1'
|
||||
|
Loading…
Reference in New Issue