Update to 7f3c90ab252e212c67c8aa80d2e83fb9b686ee6d

[ctv] Add new extractor 7f3c90ab25
pull/280/head
pukkandan 5 years ago
parent 99dc455151
commit 3221ffac20

@ -28,6 +28,7 @@ from ..utils import (
parse_iso8601, parse_iso8601,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
try_get,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
UnsupportedError, UnsupportedError,
@ -609,24 +610,27 @@ class BrightcoveNewIE(AdobePassIE):
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x) store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
def extract_policy_key(): def extract_policy_key():
webpage = self._download_webpage( base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
'http://players.brightcove.net/%s/%s_%s/index.min.js' config = self._download_json(
% (account_id, player_id, embed), video_id) base_url + 'config.json', video_id, fatal=False) or {}
policy_key = try_get(
policy_key = None config, lambda x: x['video_cloud']['policy_key'])
if not policy_key:
webpage = self._download_webpage(
base_url + 'index.min.js', video_id)
catalog = self._search_regex( catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None) r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog: if catalog:
policy_key = catalog.get('policyKey') catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if not policy_key: if catalog:
policy_key = self._search_regex( policy_key = catalog.get('policyKey')
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk') if not policy_key:
policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk')
store_pk(policy_key) store_pk(policy_key)
return policy_key return policy_key

@ -11,7 +11,47 @@ from ..utils import (
class CBSLocalIE(AnvatoIE): class CBSLocalIE(AnvatoIE):
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)' _VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
_TESTS = [{
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
'info_dict': {
'id': '3580809',
'ext': 'mp4',
'title': 'A Very Blue Anniversary',
'description': 'CBS2s Cindy Hsu has more.',
'thumbnail': 're:^https?://.*',
'timestamp': int,
'upload_date': r're:^\d{8}$',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
'categories': [
'Stations\\Spoken Word\\WCBSTV',
'Syndication\\AOL',
'Syndication\\MSN',
'Syndication\\NDN',
'Syndication\\Yahoo',
'Content\\News',
'Content\\News\\Local News',
],
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
mcp_id = self._match_id(url)
return self.url_result(
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
class CBSLocalArticleIE(AnvatoIE):
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
_TESTS = [{ _TESTS = [{
# Anvato backend # Anvato backend
@ -52,31 +92,6 @@ class CBSLocalIE(AnvatoIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
'info_dict': {
'id': '3580809',
'ext': 'mp4',
'title': 'A Very Blue Anniversary',
'description': 'CBS2s Cindy Hsu has more.',
'thumbnail': 're:^https?://.*',
'timestamp': int,
'upload_date': r're:^\d{8}$',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
'categories': [
'Stations\\Spoken Word\\WCBSTV',
'Syndication\\AOL',
'Syndication\\MSN',
'Syndication\\NDN',
'Syndication\\Yahoo',
'Content\\News',
'Content\\News\\Local News',
],
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

@ -0,0 +1,52 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class CTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)'
_TESTS = [{
'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88',
'info_dict': {
'id': '2102249',
'ext': 'flv',
'title': 'Wednesday, December 23, 2020',
'thumbnail': r're:^https?://.*\.jpg$',
'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.',
'timestamp': 1608732000,
'upload_date': '20201223',
'series': 'Your Morning',
'season': '2020-2021',
'season_number': 5,
'episode_number': 88,
'tags': ['Your Morning'],
'categories': ['Talk Show'],
'duration': 7467.126,
},
}, {
'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
content = self._download_json(
'https://www.ctv.ca/space-graphql/graphql', display_id, query={
'query': '''{
resolvedPath(path: "/%s") {
lastSegment {
content {
... on AxisContent {
axisId
videoPlayerDestCode
}
}
}
}
}''' % display_id,
})['data']['resolvedPath']['lastSegment']['content']
video_id = content['axisId']
return self.url_result(
'9c9media:%s:%s' % (content['videoPlayerDestCode'], video_id),
'NineCNineMedia', video_id)

@ -171,7 +171,10 @@ from .cbc import (
CBCOlympicsIE, CBCOlympicsIE,
) )
from .cbs import CBSIE from .cbs import CBSIE
from .cbslocal import CBSLocalIE from .cbslocal import (
CBSLocalIE,
CBSLocalArticleIE,
)
from .cbsinteractive import CBSInteractiveIE from .cbsinteractive import CBSInteractiveIE
from .cbsnews import ( from .cbsnews import (
CBSNewsEmbedIE, CBSNewsEmbedIE,
@ -249,6 +252,7 @@ from .crunchyroll import (
) )
from .cspan import CSpanIE from .cspan import CSpanIE
from .ctsnews import CtsNewsIE from .ctsnews import CtsNewsIE
from .ctv import CTVIE
from .ctvnews import CTVNewsIE from .ctvnews import CTVNewsIE
from .cultureunplugged import CultureUnpluggedIE from .cultureunplugged import CultureUnpluggedIE
from .curiositystream import ( from .curiositystream import (

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
import socket import socket
@ -8,6 +9,7 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_http_client, compat_http_client,
compat_str,
compat_urllib_error, compat_urllib_error,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_unquote_plus, compat_urllib_parse_unquote_plus,
@ -47,7 +49,8 @@ class FacebookIE(InfoExtractor):
)\?(?:.*?)(?:v|video_id|story_fbid)=| )\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)?| [^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/| [^/]+/posts/|
groups/[^/]+/permalink/ groups/[^/]+/permalink/|
watchparty/
)| )|
facebook: facebook:
) )
@ -280,8 +283,18 @@ class FacebookIE(InfoExtractor):
# data.video.creation_story.attachments[].media # data.video.creation_story.attachments[].media
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275', 'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.facebook.com/watchparty/211641140192478',
'info_dict': {
'id': '211641140192478',
},
'playlist_count': 1,
'skip': 'Requires logging in',
}] }]
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)' _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
_api_config = {
'graphURI': '/api/graphql/'
}
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
@ -405,6 +418,17 @@ class FacebookIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
def extract_relay_data(_filter):
return self._parse_json(self._search_regex(
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
def extract_relay_prefetched_data(_filter):
replay_data = extract_relay_data(_filter)
for require in (replay_data.get('require') or []):
if require[0] == 'RelayPrefetchedStreamCache':
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
if not video_data: if not video_data:
server_js_data = self._parse_json(self._search_regex([ server_js_data = self._parse_json(self._search_regex([
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX, r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
@ -413,87 +437,83 @@ class FacebookIE(InfoExtractor):
video_data = extract_from_jsmods_instances(server_js_data) video_data = extract_from_jsmods_instances(server_js_data)
if not video_data: if not video_data:
graphql_data = self._parse_json(self._search_regex( data = extract_relay_prefetched_data(
r'handleWithCustomApplyEach\([^,]+,\s*({.*?"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+".*?})\);', r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
webpage, 'graphql data', default='{}'), video_id, fatal=False) or {} if data:
for require in (graphql_data.get('require') or []): entries = []
if require[0] == 'RelayPrefetchedStreamCache':
entries = [] def parse_graphql_video(video):
formats = []
def parse_graphql_video(video): q = qualities(['sd', 'hd'])
formats = [] for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
q = qualities(['sd', 'hd']) playable_url = video.get('playable_url' + suffix)
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]: if not playable_url:
playable_url = video.get('playable_url' + suffix) continue
if not playable_url: formats.append({
continue 'format_id': format_id,
formats.append({ 'quality': q(format_id),
'format_id': format_id, 'url': playable_url,
'quality': q(format_id), })
'url': playable_url, extract_dash_manifest(video, formats)
}) process_formats(formats)
extract_dash_manifest(video, formats) v_id = video.get('videoId') or video.get('id') or video_id
process_formats(formats) info = {
v_id = video.get('videoId') or video.get('id') or video_id 'id': v_id,
info = { 'formats': formats,
'id': v_id, 'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
'formats': formats, 'uploader_id': try_get(video, lambda x: x['owner']['id']),
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']), 'timestamp': int_or_none(video.get('publish_time')),
'uploader_id': try_get(video, lambda x: x['owner']['id']), 'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
'timestamp': int_or_none(video.get('publish_time')), }
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), description = try_get(video, lambda x: x['savable_description']['text'])
} title = video.get('name')
description = try_get(video, lambda x: x['savable_description']['text']) if title:
title = video.get('name') info.update({
if title: 'title': title,
info.update({ 'description': description,
'title': title, })
'description': description, else:
}) info['title'] = description or 'Facebook video #%s' % v_id
else: entries.append(info)
info['title'] = description or 'Facebook video #%s' % v_id
entries.append(info) def parse_attachment(attachment, key='media'):
media = attachment.get(key) or {}
def parse_attachment(attachment, key='media'): if media.get('__typename') == 'Video':
media = attachment.get(key) or {} return parse_graphql_video(media)
if media.get('__typename') == 'Video':
return parse_graphql_video(media) nodes = data.get('nodes') or []
node = data.get('node') or {}
data = try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {} if not nodes and node:
nodes.append(node)
nodes = data.get('nodes') or [] for node in nodes:
node = data.get('node') or {} story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
if not nodes and node: attachments = try_get(story, [
nodes.append(node) lambda x: x['attached_story']['attachments'],
for node in nodes: lambda x: x['attachments']
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {} ], list) or []
attachments = try_get(story, [ for attachment in attachments:
lambda x: x['attached_story']['attachments'], attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
lambda x: x['attachments'] ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
], list) or [] for n in ns:
for attachment in attachments: parse_attachment(n)
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict) parse_attachment(attachment)
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
for n in ns: edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
parse_attachment(n) for edge in edges:
parse_attachment(attachment) parse_attachment(edge, key='node')
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or [] video = data.get('video') or {}
for edge in edges: if video:
parse_attachment(edge, key='node') attachments = try_get(video, [
lambda x: x['story']['attachments'],
video = data.get('video') or {} lambda x: x['creation_story']['attachments']
if video: ], list) or []
attachments = try_get(video, [ for attachment in attachments:
lambda x: x['story']['attachments'], parse_attachment(attachment)
lambda x: x['creation_story']['attachments'] if not entries:
], list) or [] parse_graphql_video(video)
for attachment in attachments:
parse_attachment(attachment) return self.playlist_result(entries, video_id)
if not entries:
parse_graphql_video(video)
return self.playlist_result(entries, video_id)
if not video_data: if not video_data:
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
@ -504,6 +524,43 @@ class FacebookIE(InfoExtractor):
elif '>You must log in to continue' in webpage: elif '>You must log in to continue' in webpage:
self.raise_login_required() self.raise_login_required()
if not video_data and '/watchparty/' in url:
post_data = {
'doc_id': 3731964053542869,
'variables': json.dumps({
'livingRoomID': video_id,
}),
}
prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
if prefetched_data:
lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
if lsd:
post_data[lsd['name']] = lsd['value']
relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
for define in (relay_data.get('define') or []):
if define[0] == 'RelayAPIConfigDefaults':
self._api_config = define[2]
living_room = self._download_json(
urljoin(url, self._api_config['graphURI']), video_id,
data=urlencode_postdata(post_data))['data']['living_room']
entries = []
for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
video = try_get(edge, lambda x: x['node']['video']) or {}
v_id = video.get('id')
if not v_id:
continue
v_id = compat_str(v_id)
entries.append(self.url_result(
self._VIDEO_PAGE_TEMPLATE % v_id,
self.ie_key(), v_id, video.get('name')))
return self.playlist_result(entries, video_id)
if not video_data:
# Video info not in first request, do a secondary request using # Video info not in first request, do a secondary request using
# tahoe player specific URL # tahoe player specific URL
tahoe_data = self._download_webpage( tahoe_data = self._download_webpage(

@ -5,10 +5,11 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
parse_iso8601,
float_or_none,
ExtractorError, ExtractorError,
float_or_none,
int_or_none, int_or_none,
parse_iso8601,
try_get,
) )
@ -35,7 +36,7 @@ class NineCNineMediaIE(InfoExtractor):
'$include': '[HasClosedCaptions]', '$include': '[HasClosedCaptions]',
}) })
if content_package.get('Constraints', {}).get('Security', {}).get('Type'): if try_get(content_package, lambda x: x['Constraints']['Security']['Type']):
raise ExtractorError('This video is DRM protected.', expected=True) raise ExtractorError('This video is DRM protected.', expected=True)
manifest_base_url = content_package_url + 'manifest.' manifest_base_url = content_package_url + 'manifest.'
@ -52,7 +53,7 @@ class NineCNineMediaIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
thumbnails = [] thumbnails = []
for image in content.get('Images', []): for image in (content.get('Images') or []):
image_url = image.get('Url') image_url = image.get('Url')
if not image_url: if not image_url:
continue continue
@ -70,7 +71,7 @@ class NineCNineMediaIE(InfoExtractor):
continue continue
container.append(e_name) container.append(e_name)
season = content.get('Season', {}) season = content.get('Season') or {}
info = { info = {
'id': content_id, 'id': content_id,
@ -79,13 +80,14 @@ class NineCNineMediaIE(InfoExtractor):
'timestamp': parse_iso8601(content.get('BroadcastDateTime')), 'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
'episode_number': int_or_none(content.get('Episode')), 'episode_number': int_or_none(content.get('Episode')),
'season': season.get('Name'), 'season': season.get('Name'),
'season_number': season.get('Number'), 'season_number': int_or_none(season.get('Number')),
'season_id': season.get('Id'), 'season_id': season.get('Id'),
'series': content.get('Media', {}).get('Name'), 'series': try_get(content, lambda x: x['Media']['Name']),
'tags': tags, 'tags': tags,
'categories': categories, 'categories': categories,
'duration': float_or_none(content_package.get('Duration')), 'duration': float_or_none(content_package.get('Duration')),
'formats': formats, 'formats': formats,
'thumbnails': thumbnails,
} }
if content_package.get('HasClosedCaptions'): if content_package.get('HasClosedCaptions'):

@ -1,40 +1,112 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import time
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import smuggle_url from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
)
class SonyLIVIE(InfoExtractor): class SonyLIVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight", 'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
'info_dict': { 'info_dict': {
'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight", 'title': 'Bachelors Delight - Achaari Cheese Toast',
'id': 'ref:5024612095001', 'id': '1000022678',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20170923', 'upload_date': '20200411',
'description': 'md5:7f28509a148d5be9d0782b4d5106410d', 'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
'uploader_id': '5182475815001', 'timestamp': 1586632091,
'timestamp': 1506200547, 'duration': 185,
'season_number': 1,
'episode': 'Achaari Cheese Toast',
'episode_number': 1,
'release_year': 2016,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['BrightcoveNew'],
}, { }, {
'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)', 'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
'only_matching': True,
}, {
'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
'only_matching': True,
}, {
'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
'only_matching': True,
}, {
'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
'only_matching': True,
}, {
'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
'only_matching': True, 'only_matching': True,
}] }]
_GEO_COUNTRIES = ['IN']
_TOKEN = None
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s' def _call_api(self, version, path, video_id):
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s' headers = {}
if self._TOKEN:
headers['security_token'] = self._TOKEN
try:
return self._download_json(
'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
video_id, headers=headers)['resultObj']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
message = self._parse_json(
e.cause.read().decode(), video_id)['message']
if message == 'Geoblocked Country':
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
raise ExtractorError(message)
raise
def _real_initialize(self):
self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None)
def _real_extract(self, url): def _real_extract(self, url):
brightcove_id = self._match_id(url) video_id = self._match_id(url)
return self.url_result( content = self._call_api(
smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, { '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
'geo_countries': ['IN'], if content.get('isEncrypted'):
'referrer': url, raise ExtractorError('This video is DRM protected.', expected=True)
}), dash_url = content['videoURL']
'BrightcoveNew', brightcove_id) headers = {
'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
}
formats = self._extract_mpd_formats(
dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
formats.extend(self._extract_m3u8_formats(
dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
for f in formats:
f.setdefault('http_headers', {}).update(headers)
self._sort_formats(formats)
metadata = self._call_api(
'1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
title = metadata['title']
episode = metadata.get('episodeTitle')
if episode and title != episode:
title += ' - ' + episode
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnail': content.get('posterURL'),
'description': metadata.get('longDescription') or metadata.get('shortDescription'),
'timestamp': int_or_none(metadata.get('creationDate'), 1000),
'duration': int_or_none(metadata.get('duration')),
'season_number': int_or_none(metadata.get('season')),
'episode': episode,
'episode_number': int_or_none(metadata.get('episodeNumber')),
'release_year': int_or_none(metadata.get('year')),
}

@ -2,25 +2,40 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..utils import (
from ..utils import unified_strdate int_or_none,
parse_iso8601,
str_or_none,
strip_or_none,
try_get,
urljoin,
)
class StreetVoiceIE(InfoExtractor): class StreetVoiceIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://streetvoice.com/skippylu/songs/94440/', 'url': 'https://streetvoice.com/skippylu/songs/123688/',
'md5': '15974627fc01a29e492c98593c2fd472', 'md5': '0eb535970629a5195685355f3ed60bfd',
'info_dict': { 'info_dict': {
'id': '94440', 'id': '123688',
'ext': 'mp3', 'ext': 'mp3',
'title': '', 'title': '流浪',
'description': 'Crispy脆樂團 - 輸', 'description': 'md5:8eb0bfcc9dcd8aa82bd6efca66e3fea6',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg',
'duration': 260, 'duration': 270,
'upload_date': '20091018', 'upload_date': '20100923',
'uploader': 'Crispy脆樂團', 'uploader': 'Crispy脆樂團',
'uploader_id': '627810', 'uploader_id': '627810',
'uploader_url': 're:^https?://streetvoice.com/skippylu/',
'timestamp': 1285261661,
'view_count': int,
'like_count': int,
'comment_count': int,
'repost_count': int,
'track': '流浪',
'track_id': '123688',
'album': '2010',
} }
}, { }, {
'url': 'http://tw.streetvoice.com/skippylu/songs/94440/', 'url': 'http://tw.streetvoice.com/skippylu/songs/94440/',
@ -29,21 +44,57 @@ class StreetVoiceIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
song_id = self._match_id(url) song_id = self._match_id(url)
base_url = 'https://streetvoice.com/api/v4/song/%s/' % song_id
song = self._download_json(base_url, song_id, query={
'fields': 'album,comments_count,created_at,id,image,length,likes_count,name,nickname,plays_count,profile,share_count,synopsis,user,username',
})
title = song['name']
song = self._download_json( formats = []
'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'') for suffix, format_id in [('hls/file', 'hls'), ('file', 'http'), ('file/original', 'original')]:
f_url = (self._download_json(
base_url + suffix + '/', song_id,
'Downloading %s format URL' % format_id,
data=b'', fatal=False) or {}).get('file')
if not f_url:
continue
f = {
'ext': 'mp3',
'format_id': format_id,
'url': f_url,
'vcodec': 'none',
}
if format_id == 'hls':
f['protocol'] = 'm3u8_native'
abr = self._search_regex(r'\.mp3\.(\d+)k', f_url, 'bitrate', default=None)
if abr:
abr = int(abr)
f.update({
'abr': abr,
'tbr': abr,
})
formats.append(f)
title = song['name'] user = song.get('user') or {}
author = song['user']['nickname'] username = user.get('username')
get_count = lambda x: int_or_none(song.get(x + '_count'))
return { return {
'id': song_id, 'id': song_id,
'url': song['file'], 'formats': formats,
'title': title, 'title': title,
'description': '%s - %s' % (author, title), 'description': strip_or_none(song.get('synopsis')),
'thumbnail': self._proto_relative_url(song.get('image'), 'http:'), 'thumbnail': song.get('image'),
'duration': song.get('length'), 'duration': int_or_none(song.get('length')),
'upload_date': unified_strdate(song.get('created_at')), 'timestamp': parse_iso8601(song.get('created_at')),
'uploader': author, 'uploader': try_get(user, lambda x: x['profile']['nickname']),
'uploader_id': compat_str(song['user']['id']), 'uploader_id': str_or_none(user.get('id')),
'uploader_url': urljoin(url, '/%s/' % username) if username else None,
'view_count': get_count('plays'),
'like_count': get_count('likes'),
'comment_count': get_count('comments'),
'repost_count': get_count('share'),
'track': title,
'track_id': song_id,
'album': try_get(song, lambda x: x['album']['name']),
} }

@ -200,7 +200,7 @@ class ToggleIE(InfoExtractor):
class MeWatchIE(InfoExtractor): class MeWatchIE(InfoExtractor):
IE_NAME = 'mewatch' IE_NAME = 'mewatch'
_VALID_URL = r'https?://(?:www\.)?mewatch\.sg/watch/[0-9a-zA-Z-]+-(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371', 'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
'info_dict': { 'info_dict': {
@ -214,6 +214,12 @@ class MeWatchIE(InfoExtractor):
'params': { 'params': {
'skip_download': 'm3u8 download', 'skip_download': 'm3u8 download',
}, },
}, {
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-搜密。打卡。小红点-S2-E1-176232',
'only_matching': True,
}, {
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

@ -516,7 +516,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
'397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'}, '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
} }
_SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') # TODO 'json3' raising issues with automatic captions _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
_GEO_BYPASS = False _GEO_BYPASS = False
@ -1348,17 +1348,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._parse_json( return self._parse_json(
uppercase_escape(config), video_id, fatal=False) uppercase_escape(config), video_id, fatal=False)
def _get_automatic_captions(self, video_id, webpage): def _get_automatic_captions(self, video_id, player_response, player_config):
"""We need the webpage for getting the captions url, pass it as an """We need the webpage for getting the captions url, pass it as an
argument to speed up the process.""" argument to speed up the process."""
self.to_screen('%s: Looking for automatic captions' % video_id) self.to_screen('%s: Looking for automatic captions' % video_id)
player_config = self._get_ytplayer_config(video_id, webpage)
err_msg = 'Couldn\'t find automatic captions for %s' % video_id err_msg = 'Couldn\'t find automatic captions for %s' % video_id
if not player_config: if not (player_response or player_config):
self._downloader.report_warning(err_msg) self._downloader.report_warning(err_msg)
return {} return {}
try: try:
args = player_config['args'] args = player_config.get('args') if player_config else {}
caption_url = args.get('ttsurl') caption_url = args.get('ttsurl')
if caption_url: if caption_url:
timestamp = args['timestamp'] timestamp = args['timestamp']
@ -1417,19 +1416,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return captions return captions
# New captions format as of 22.06.2017 # New captions format as of 22.06.2017
player_response = args.get('player_response') if player_response:
if player_response and isinstance(player_response, compat_str): renderer = player_response['captions']['playerCaptionsTracklistRenderer']
player_response = self._parse_json( base_url = renderer['captionTracks'][0]['baseUrl']
player_response, video_id, fatal=False) sub_lang_list = []
if player_response: for lang in renderer['translationLanguages']:
renderer = player_response['captions']['playerCaptionsTracklistRenderer'] lang_code = lang.get('languageCode')
base_url = renderer['captionTracks'][0]['baseUrl'] if lang_code:
sub_lang_list = [] sub_lang_list.append(lang_code)
for lang in renderer['translationLanguages']: return make_captions(base_url, sub_lang_list)
lang_code = lang.get('languageCode')
if lang_code:
sub_lang_list.append(lang_code)
return make_captions(base_url, sub_lang_list)
# Some videos don't provide ttsurl but rather caption_tracks and # Some videos don't provide ttsurl but rather caption_tracks and
# caption_translation_languages (e.g. 20LmZk1hakA) # caption_translation_languages (e.g. 20LmZk1hakA)
@ -2366,7 +2361,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# subtitles # subtitles
video_subtitles = self.extract_subtitles( video_subtitles = self.extract_subtitles(
video_id, video_webpage, has_live_chat_replay) video_id, video_webpage, has_live_chat_replay)
automatic_captions = self.extract_automatic_captions(video_id, video_webpage) automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config)
video_duration = try_get( video_duration = try_get(
video_info, lambda x: int_or_none(x['length_seconds'][0])) video_info, lambda x: int_or_none(x['length_seconds'][0]))

Loading…
Cancel
Save