Update to release 2020.12.09

pull/280/head
pukkandan 5 years ago
parent c7a13929f8
commit 4fc9353242

@ -430,7 +430,8 @@
- **la7.it** - **la7.it**
- **laola1tv** - **laola1tv**
- **laola1tv:embed** - **laola1tv:embed**
- **lbry.tv** - **lbry**
- **lbry:channel**
- **LCI** - **LCI**
- **Lcp** - **Lcp**
- **LcpPlay** - **LcpPlay**
@ -911,6 +912,7 @@
- **TeleQuebecEmission** - **TeleQuebecEmission**
- **TeleQuebecLive** - **TeleQuebecLive**
- **TeleQuebecSquat** - **TeleQuebecSquat**
- **TeleQuebecVideo**
- **TeleTask** - **TeleTask**
- **Telewebion** - **Telewebion**
- **TennisTV** - **TennisTV**

@ -108,6 +108,18 @@ class TestInfoExtractor(unittest.TestCase):
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
def test_parse_html5_media_entries(self): def test_parse_html5_media_entries(self):
# inline video tag
expect_dict(
self,
self.ie._parse_html5_media_entries(
'https://127.0.0.1/video.html',
r'<html><video src="/vid.mp4" /></html>', None)[0],
{
'formats': [{
'url': 'https://127.0.0.1/vid.mp4',
}],
})
# from https://www.r18.com/ # from https://www.r18.com/
# with kpbs in label # with kpbs in label
expect_dict( expect_dict(

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
@ -11,25 +13,22 @@ from ..utils import (
class AMCNetworksIE(ThePlatformIE): class AMCNetworksIE(ThePlatformIE):
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', 'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
'md5': '',
'info_dict': { 'info_dict': {
'id': 's3MX01Nl4vPH', 'id': '4Lq1dzOnZGt0',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Maron - Season 4 - Step 1', 'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.', 'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
'age_limit': 17, 'upload_date': '20201120',
'upload_date': '20160505', 'timestamp': 1605904350,
'timestamp': 1462468831,
'uploader': 'AMCN', 'uploader': 'AMCN',
}, },
'params': { 'params': {
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'skip': 'Requires TV provider accounts',
}, { }, {
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
'only_matching': True, 'only_matching': True,
@ -55,32 +54,33 @@ class AMCNetworksIE(ThePlatformIE):
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1', 'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
'only_matching': True, 'only_matching': True,
}] }]
_REQUESTOR_ID_MAP = {
'amc': 'AMC',
'bbcamerica': 'BBCA',
'ifc': 'IFC',
'sundancetv': 'SUNDANCE',
'wetv': 'WETV',
}
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) site, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) requestor_id = self._REQUESTOR_ID_MAP[site]
properties = self._download_json(
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id),
display_id)['data']['properties']
query = { query = {
'mbr': 'true', 'mbr': 'true',
'manifest': 'm3u', 'manifest': 'm3u',
} }
media_url = self._search_regex( tp_path = 'M_UwQC/media/' + properties['videoPid']
r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', media_url = 'https://link.theplatform.com/s/' + tp_path
webpage, 'media url') theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
r'link\.theplatform\.com/s/([^?]+)',
media_url, 'theplatform_path'), display_id)
info = self._parse_theplatform_metadata(theplatform_metadata) info = self._parse_theplatform_metadata(theplatform_metadata)
video_id = theplatform_metadata['pid'] video_id = theplatform_metadata['pid']
title = theplatform_metadata['title'] title = theplatform_metadata['title']
rating = try_get( rating = try_get(
theplatform_metadata, lambda x: x['ratings'][0]['rating']) theplatform_metadata, lambda x: x['ratings'][0]['rating'])
auth_required = self._search_regex( if properties.get('videoCategory') == 'TVE-Auth':
r'window\.authRequired\s*=\s*(true|false);',
webpage, 'auth required')
if auth_required == 'true':
requestor_id = self._search_regex(
r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
webpage, 'requestor id')
resource = self._get_mvpd_resource( resource = self._get_mvpd_resource(
requestor_id, title, video_id, rating) requestor_id, title, video_id, rating)
query['auth'] = self._extract_mvpd_auth( query['auth'] = self._extract_mvpd_auth(

@ -1,33 +1,33 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
int_or_none,
js_to_json,
try_get, try_get,
unified_strdate, unified_strdate,
) )
class AmericasTestKitchenIE(InfoExtractor): class AmericasTestKitchenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers', 'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
'md5': 'b861c3e365ac38ad319cfd509c30577f', 'md5': 'b861c3e365ac38ad319cfd509c30577f',
'info_dict': { 'info_dict': {
'id': '5b400b9ee338f922cb06450c', 'id': '5b400b9ee338f922cb06450c',
'title': 'Weeknight Japanese Suppers', 'title': 'Japanese Suppers',
'ext': 'mp4', 'ext': 'mp4',
'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8', 'description': 'md5:64e606bfee910627efc4b5f050de92b3',
'thumbnail': r're:^https?://', 'thumbnail': r're:^https?://',
'timestamp': 1523664000, 'timestamp': 1523664000,
'upload_date': '20180414', 'upload_date': '20180414',
'release_date': '20180414', 'release_date': '20180410',
'series': "America's Test Kitchen", 'series': "America's Test Kitchen",
'season_number': 18, 'season_number': 18,
'episode': 'Weeknight Japanese Suppers', 'episode': 'Japanese Suppers',
'episode_number': 15, 'episode_number': 15,
}, },
'params': { 'params': {
@ -36,47 +36,31 @@ class AmericasTestKitchenIE(InfoExtractor):
}, { }, {
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon', 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
'only_matching': True,
}, {
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) resource_type, video_id = re.match(self._VALID_URL, url).groups()
is_episode = resource_type == 'episode'
webpage = self._download_webpage(url, video_id) if is_episode:
resource_type = 'episodes'
video_data = self._parse_json( resource = self._download_json(
self._search_regex( 'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>', video = resource['video'] if is_episode else resource
webpage, 'initial context'), episode = resource if is_episode else resource.get('episode') or {}
video_id, js_to_json)
ep_data = try_get(
video_data,
(lambda x: x['episodeDetail']['content']['data'],
lambda x: x['videoDetail']['content']['data']), dict)
ep_meta = ep_data.get('full_video', {})
zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
title = ep_data.get('title') or ep_meta.get('title')
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
'description') or ep_meta.get('description'))
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
release_date = unified_strdate(ep_data.get('aired_at'))
season_number = int_or_none(ep_meta.get('season_number'))
episode = ep_meta.get('title')
episode_number = int_or_none(ep_meta.get('episode_number'))
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id, 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
'ie_key': 'Zype', 'ie_key': 'Zype',
'title': title, 'description': clean_html(video.get('description')),
'description': description, 'release_date': unified_strdate(video.get('publishDate')),
'thumbnail': thumbnail, 'series': try_get(episode, lambda x: x['show']['title']),
'release_date': release_date, 'episode': episode.get('title'),
'series': "America's Test Kitchen",
'season_number': season_number,
'episode': episode,
'episode_number': episode_number,
} }

@ -2516,9 +2516,9 @@ class InfoExtractor(object):
# https://www.ampproject.org/docs/reference/components/amp-video) # https://www.ampproject.org/docs/reference/components/amp-video)
# For dl8-* tags see https://delight-vr.com/documentation/dl8-video/ # For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
_MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)' _MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
media_tags = [(media_tag, media_type, '') media_tags = [(media_tag, media_tag_name, media_type, '')
for media_tag, media_type for media_tag, media_tag_name, media_type
in re.findall(r'(?s)(<%s[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)] in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
media_tags.extend(re.findall( media_tags.extend(re.findall(
# We only allow video|audio followed by a whitespace or '>'. # We only allow video|audio followed by a whitespace or '>'.
# Allowing more characters may end up in significant slow down (see # Allowing more characters may end up in significant slow down (see

@ -555,7 +555,10 @@ from .laola1tv import (
EHFTVIE, EHFTVIE,
ITTFIE, ITTFIE,
) )
from .lbry import LBRYIE from .lbry import (
LBRYIE,
LBRYChannelIE,
)
from .lci import LCIIE from .lci import LCIIE
from .lcp import ( from .lcp import (
LcpPlayIE, LcpPlayIE,
@ -1192,6 +1195,7 @@ from .telequebec import (
TeleQuebecSquatIE, TeleQuebecSquatIE,
TeleQuebecEmissionIE, TeleQuebecEmissionIE,
TeleQuebecLiveIE, TeleQuebecLiveIE,
TeleQuebecVideoIE,
) )
from .teletask import TeleTaskIE from .teletask import TeleTaskIE
from .telewebion import TelewebionIE from .telewebion import TelewebionIE

@ -2114,23 +2114,23 @@ class GenericIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{ # {
# Zype embed # # Zype embed
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites', # 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
'info_dict': { # 'info_dict': {
'id': '5b400b834b32992a310622b9', # 'id': '5b400b834b32992a310622b9',
'ext': 'mp4', # 'ext': 'mp4',
'title': 'Smoky Barbecue Favorites', # 'title': 'Smoky Barbecue Favorites',
'thumbnail': r're:^https?://.*\.jpe?g', # 'thumbnail': r're:^https?://.*\.jpe?g',
'description': 'md5:5ff01e76316bd8d46508af26dc86023b', # 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
'upload_date': '20170909', # 'upload_date': '20170909',
'timestamp': 1504915200, # 'timestamp': 1504915200,
}, # },
'add_ie': [ZypeIE.ie_key()], # 'add_ie': [ZypeIE.ie_key()],
'params': { # 'params': {
'skip_download': True, # 'skip_download': True,
}, # },
}, # },
{ {
# videojs embed # videojs embed
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904', 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import functools
import json import json
from .common import InfoExtractor from .common import InfoExtractor
@ -10,13 +11,73 @@ from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
OnDemandPagedList,
try_get, try_get,
urljoin,
) )
class LBRYIE(InfoExtractor): class LBRYBaseIE(InfoExtractor):
IE_NAME = 'lbry.tv' _BASE_URL_REGEX = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/'
_VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P<id>@[^:]+:[0-9a-z]+/[^:]+:[0-9a-z])' _CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
_OPT_CLAIM_ID = '[^:/?#&]+(?::%s)?' % _CLAIM_ID_REGEX
_SUPPORTED_STREAM_TYPES = ['video', 'audio']
def _call_api_proxy(self, method, display_id, params, resource):
return self._download_json(
'https://api.lbry.tv/api/v1/proxy',
display_id, 'Downloading %s JSON metadata' % resource,
headers={'Content-Type': 'application/json-rpc'},
data=json.dumps({
'method': method,
'params': params,
}).encode())['result']
def _resolve_url(self, url, display_id, resource):
return self._call_api_proxy(
'resolve', display_id, {'urls': url}, resource)[url]
def _permanent_url(self, url, claim_name, claim_id):
return urljoin(url, '/%s:%s' % (claim_name, claim_id))
def _parse_stream(self, stream, url):
stream_value = stream.get('value') or {}
stream_type = stream_value.get('stream_type')
source = stream_value.get('source') or {}
media = stream_value.get(stream_type) or {}
signing_channel = stream.get('signing_channel') or {}
channel_name = signing_channel.get('name')
channel_claim_id = signing_channel.get('claim_id')
channel_url = None
if channel_name and channel_claim_id:
channel_url = self._permanent_url(url, channel_name, channel_claim_id)
info = {
'thumbnail': try_get(stream_value, lambda x: x['thumbnail']['url'], compat_str),
'description': stream_value.get('description'),
'license': stream_value.get('license'),
'timestamp': int_or_none(stream.get('timestamp')),
'tags': stream_value.get('tags'),
'duration': int_or_none(media.get('duration')),
'channel': try_get(signing_channel, lambda x: x['value']['title']),
'channel_id': channel_claim_id,
'channel_url': channel_url,
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
'filesize': int_or_none(source.get('size')),
}
if stream_type == 'audio':
info['vcodec'] = 'none'
else:
info.update({
'width': int_or_none(media.get('width')),
'height': int_or_none(media.get('height')),
})
return info
class LBRYIE(LBRYBaseIE):
IE_NAME = 'lbry'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
_TESTS = [{ _TESTS = [{
# Video # Video
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1', 'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
@ -28,6 +89,8 @@ class LBRYIE(InfoExtractor):
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51', 'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
'timestamp': 1595694354, 'timestamp': 1595694354,
'upload_date': '20200725', 'upload_date': '20200725',
'width': 1280,
'height': 720,
} }
}, { }, {
# Audio # Audio
@ -40,6 +103,12 @@ class LBRYIE(InfoExtractor):
'description': 'md5:661ac4f1db09f31728931d7b88807a61', 'description': 'md5:661ac4f1db09f31728931d7b88807a61',
'timestamp': 1591312601, 'timestamp': 1591312601,
'upload_date': '20200604', 'upload_date': '20200604',
'tags': list,
'duration': 2570,
'channel': 'The LBRY Foundation',
'channel_id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
'vcodec': 'none',
} }
}, { }, {
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e', 'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
@ -47,45 +116,99 @@ class LBRYIE(InfoExtractor):
}, { }, {
'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b", 'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://lbry.tv/Episode-1:e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
'only_matching': True,
}, {
'url': 'https://lbry.tv/$/embed/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
'only_matching': True,
}, {
'url': 'https://lbry.tv/Episode-1:e7',
'only_matching': True,
}, {
'url': 'https://lbry.tv/@LBRYFoundation/Episode-1',
'only_matching': True,
}, {
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
'only_matching': True,
}] }]
def _call_api_proxy(self, method, display_id, params):
return self._download_json(
'https://api.lbry.tv/api/v1/proxy', display_id,
headers={'Content-Type': 'application/json-rpc'},
data=json.dumps({
'method': method,
'params': params,
}).encode())['result']
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url).replace(':', '#') display_id = self._match_id(url)
if display_id.startswith('$/'):
display_id = display_id.split('/', 2)[-1].replace('/', ':')
else:
display_id = display_id.replace(':', '#')
uri = 'lbry://' + display_id uri = 'lbry://' + display_id
result = self._call_api_proxy( result = self._resolve_url(uri, display_id, 'stream')
'resolve', display_id, {'urls': [uri]})[uri]
result_value = result['value'] result_value = result['value']
if result_value.get('stream_type') not in ('video', 'audio'): if result_value.get('stream_type') not in self._SUPPORTED_STREAM_TYPES:
raise ExtractorError('Unsupported URL', expected=True) raise ExtractorError('Unsupported URL', expected=True)
claim_id = result['claim_id']
title = result_value['title']
streaming_url = self._call_api_proxy( streaming_url = self._call_api_proxy(
'get', display_id, {'uri': uri})['streaming_url'] 'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
source = result_value.get('source') or {} info = self._parse_stream(result, url)
media = result_value.get('video') or result_value.get('audio') or {} info.update({
signing_channel = result_value.get('signing_channel') or {} 'id': claim_id,
'title': title,
return {
'id': result['claim_id'],
'title': result_value['title'],
'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str),
'description': result_value.get('description'),
'license': result_value.get('license'),
'timestamp': int_or_none(result.get('timestamp')),
'tags': result_value.get('tags'),
'width': int_or_none(media.get('width')),
'height': int_or_none(media.get('height')),
'duration': int_or_none(media.get('duration')),
'channel': signing_channel.get('name'),
'channel_id': signing_channel.get('claim_id'),
'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
'filesize': int_or_none(source.get('size')),
'url': streaming_url, 'url': streaming_url,
} })
return info
class LBRYChannelIE(LBRYBaseIE):
IE_NAME = 'lbry:channel'
_VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P<id>@%s)/?(?:[?#&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
_TESTS = [{
'url': 'https://lbry.tv/@LBRYFoundation:0',
'info_dict': {
'id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
'title': 'The LBRY Foundation',
'description': 'Channel for the LBRY Foundation. Follow for updates and news.',
},
'playlist_count': 29,
}, {
'url': 'https://lbry.tv/@LBRYFoundation',
'only_matching': True,
}]
_PAGE_SIZE = 50
def _fetch_page(self, claim_id, url, page):
page += 1
result = self._call_api_proxy(
'claim_search', claim_id, {
'channel_ids': [claim_id],
'claim_type': 'stream',
'no_totals': True,
'page': page,
'page_size': self._PAGE_SIZE,
'stream_types': self._SUPPORTED_STREAM_TYPES,
}, 'page %d' % page)
for item in (result.get('items') or []):
stream_claim_name = item.get('name')
stream_claim_id = item.get('claim_id')
if not (stream_claim_name and stream_claim_id):
continue
info = self._parse_stream(item, url)
info.update({
'_type': 'url',
'id': stream_claim_id,
'title': try_get(item, lambda x: x['value']['title']),
'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
})
yield info
def _real_extract(self, url):
display_id = self._match_id(url).replace(':', '#')
result = self._resolve_url(
'lbry://' + display_id, display_id, 'channel')
claim_id = result['claim_id']
entries = OnDemandPagedList(
functools.partial(self._fetch_page, claim_id, url),
self._PAGE_SIZE)
result_value = result.get('value') or {}
return self.playlist_result(
entries, claim_id, result_value.get('title'),
result_value.get('description'))

@ -2,7 +2,12 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import smuggle_url from ..utils import (
bool_or_none,
smuggle_url,
try_get,
url_or_none,
)
class SlidesLiveIE(InfoExtractor): class SlidesLiveIE(InfoExtractor):
@ -18,8 +23,21 @@ class SlidesLiveIE(InfoExtractor):
'description': 'Watch full version of this video at https://slideslive.com/38902413.', 'description': 'Watch full version of this video at https://slideslive.com/38902413.',
'uploader': 'SlidesLive Videos - A', 'uploader': 'SlidesLive Videos - A',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
'timestamp': 1597615266,
'upload_date': '20170925', 'upload_date': '20170925',
} }
}, {
# video_service_name = yoda
'url': 'https://slideslive.com/38935785',
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
'info_dict': {
'id': 'RMraDYN5ozA_',
'ext': 'mp4',
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
},
'params': {
'format': 'bestvideo',
},
}, { }, {
# video_service_name = youtube # video_service_name = youtube
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
@ -39,18 +57,47 @@ class SlidesLiveIE(InfoExtractor):
video_data = self._download_json( video_data = self._download_json(
'https://ben.slideslive.com/player/' + video_id, video_id) 'https://ben.slideslive.com/player/' + video_id, video_id)
service_name = video_data['video_service_name'].lower() service_name = video_data['video_service_name'].lower()
assert service_name in ('url', 'vimeo', 'youtube') assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
service_id = video_data['video_service_id'] service_id = video_data['video_service_id']
subtitles = {}
for sub in try_get(video_data, lambda x: x['subtitles'], list) or []:
if not isinstance(sub, dict):
continue
webvtt_url = url_or_none(sub.get('webvtt_url'))
if not webvtt_url:
continue
lang = sub.get('language') or 'en'
subtitles.setdefault(lang, []).append({
'url': webvtt_url,
})
info = { info = {
'id': video_id, 'id': video_id,
'thumbnail': video_data.get('thumbnail'), 'thumbnail': video_data.get('thumbnail'),
'url': service_id, 'is_live': bool_or_none(video_data.get('is_live')),
'subtitles': subtitles,
} }
if service_name == 'url': if service_name in ('url', 'yoda'):
info['title'] = video_data['title'] info['title'] = video_data['title']
if service_name == 'url':
info['url'] = service_id
else:
formats = []
_MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s'
formats.extend(self._extract_m3u8_formats(
_MANIFEST_PATTERN % (service_id, 'm3u8'), service_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_mpd_formats(
_MANIFEST_PATTERN % (service_id, 'mpd'), service_id,
mpd_id='dash', fatal=False))
self._sort_formats(formats)
info.update({
'id': service_id,
'formats': formats,
})
else: else:
info.update({ info.update({
'_type': 'url_transparent', '_type': 'url_transparent',
'url': service_id,
'ie_key': service_name.capitalize(), 'ie_key': service_name.capitalize(),
'title': video_data.get('title'), 'title': video_data.get('title'),
}) })

@ -12,25 +12,16 @@ from ..utils import (
class TeleQuebecBaseIE(InfoExtractor): class TeleQuebecBaseIE(InfoExtractor):
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
@staticmethod @staticmethod
def _result(url, ie_key): def _brightcove_result(brightcove_id, player_id, account_id='6150020952001'):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': smuggle_url(url, {'geo_countries': ['CA']}), 'url': smuggle_url(TeleQuebecBaseIE.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, brightcove_id), {'geo_countries': ['CA']}),
'ie_key': ie_key, 'ie_key': 'BrightcoveNew',
} }
@staticmethod
def _limelight_result(media_id):
return TeleQuebecBaseIE._result(
'limelight:media:' + media_id, 'LimelightMedia')
@staticmethod
def _brightcove_result(brightcove_id):
return TeleQuebecBaseIE._result(
'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s'
% brightcove_id, 'BrightcoveNew')
class TeleQuebecIE(TeleQuebecBaseIE): class TeleQuebecIE(TeleQuebecBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
@ -44,14 +35,18 @@ class TeleQuebecIE(TeleQuebecBaseIE):
# available till 01.01.2023 # available till 01.01.2023
'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane', 'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
'info_dict': { 'info_dict': {
'id': '577116881b4b439084e6b1cf4ef8b1b3', 'id': '6155972771001',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Un petit choc et puis repart!', 'title': 'Un petit choc et puis repart!',
'description': 'md5:067bc84bd6afecad85e69d1000730907', 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
'timestamp': 1589262469,
'uploader_id': '6150020952001',
'upload_date': '20200512',
}, },
'params': { 'params': {
'skip_download': True, 'format': 'bestvideo',
}, },
'add_ie': ['BrightcoveNew'],
}, { }, {
'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout', 'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout',
'info_dict': { 'info_dict': {
@ -65,7 +60,6 @@ class TeleQuebecIE(TeleQuebecBaseIE):
}, },
'params': { 'params': {
'format': 'bestvideo', 'format': 'bestvideo',
'skip_download': True,
}, },
'add_ie': ['BrightcoveNew'], 'add_ie': ['BrightcoveNew'],
}, { }, {
@ -79,25 +73,20 @@ class TeleQuebecIE(TeleQuebecBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
media_id = self._match_id(url) media_id = self._match_id(url)
media = self._download_json(
media_data = self._download_json( 'https://mnmedias.api.telequebec.tv/api/v3/media/' + media_id,
'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
media_id)['media'] media_id)['media']
source_id = next(source_info['sourceId'] for source_info in media['streamInfos'] if source_info.get('source') == 'Brightcove')
source_id = media_data['streamInfo']['sourceId'] info = self._brightcove_result(source_id, '22gPKdt7f')
source = (try_get( product = media.get('product') or {}
media_data, lambda x: x['streamInfo']['source'], season = product.get('season') or {}
compat_str) or 'limelight').lower()
if source == 'brightcove':
info = self._brightcove_result(source_id)
else:
info = self._limelight_result(source_id)
info.update({ info.update({
'title': media_data.get('title'), 'description': try_get(media, lambda x: x['descriptions'][-1]['text'], compat_str),
'description': try_get( 'series': try_get(season, lambda x: x['serie']['titre']),
media_data, lambda x: x['descriptions'][0]['text'], compat_str), 'season': season.get('name'),
'duration': int_or_none( 'season_number': int_or_none(season.get('seasonNo')),
media_data.get('durationInMilliseconds'), 1000), 'episode': product.get('titre'),
'episode_number': int_or_none(product.get('episodeNo')),
}) })
return info return info
@ -148,7 +137,7 @@ class TeleQuebecSquatIE(InfoExtractor):
} }
class TeleQuebecEmissionIE(TeleQuebecBaseIE): class TeleQuebecEmissionIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
@ -160,15 +149,16 @@ class TeleQuebecEmissionIE(TeleQuebecBaseIE):
_TESTS = [{ _TESTS = [{
'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente', 'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
'info_dict': { 'info_dict': {
'id': '66648a6aef914fe3badda25e81a4d50a', 'id': '6154476028001',
'ext': 'mp4', 'ext': 'mp4',
'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?", 'title': 'Des soins esthétiques à 377 % dintérêts annuels, ça vous tente?',
'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014', 'description': 'md5:cb4d378e073fae6cce1f87c00f84ae9f',
'upload_date': '20171024', 'upload_date': '20200505',
'timestamp': 1508862118, 'timestamp': 1588713424,
'uploader_id': '6150020952001',
}, },
'params': { 'params': {
'skip_download': True, 'format': 'bestvideo',
}, },
}, { }, {
'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression', 'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
@ -187,26 +177,26 @@ class TeleQuebecEmissionIE(TeleQuebecBaseIE):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
media_id = self._search_regex( media_id = self._search_regex(
r'mediaUID\s*:\s*["\'][Ll]imelight_(?P<id>[a-z0-9]{32})', webpage, r'mediaId\s*:\s*(?P<id>\d+)', webpage, 'media id')
'limelight id')
info = self._limelight_result(media_id) return self.url_result(
info.update({ 'http://zonevideo.telequebec.tv/media/' + media_id,
'title': self._og_search_title(webpage, default=None), TeleQuebecIE.ie_key())
'description': self._og_search_description(webpage, default=None),
})
return info
class TeleQuebecLiveIE(InfoExtractor): class TeleQuebecLiveIE(TeleQuebecBaseIE):
_VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)' _VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
_TEST = { _TEST = {
'url': 'http://zonevideo.telequebec.tv/endirect/', 'url': 'http://zonevideo.telequebec.tv/endirect/',
'info_dict': { 'info_dict': {
'id': 'endirect', 'id': '6159095684001',
'ext': 'mp4', 'ext': 'mp4',
'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^Télé-Québec [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'is_live': True, 'is_live': True,
'description': 'Canal principal de Télé-Québec',
'uploader_id': '6150020952001',
'timestamp': 1590439901,
'upload_date': '20200525',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -214,25 +204,49 @@ class TeleQuebecLiveIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) return self._brightcove_result('6159095684001', 'skCsmi2Uw')
m3u8_url = None
webpage = self._download_webpage(
'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id,
fatal=False)
if webpage:
m3u8_url = self._search_regex(
r'm3U8Url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'm3u8 url', default=None, group='url')
if not m3u8_url:
m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8'
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', m3u8_id='hls')
self._sort_formats(formats)
return { class TeleQuebecVideoIE(TeleQuebecBaseIE):
'id': video_id, _VALID_URL = r'https?://video\.telequebec\.tv/player(?:-live)?/(?P<id>\d+)'
'title': self._live_title('Télé-Québec - En direct'), _TESTS = [{
'is_live': True, 'url': 'https://video.telequebec.tv/player/31110/stream',
'formats': formats, 'info_dict': {
} 'id': '6202570652001',
'ext': 'mp4',
'title': 'Le coût du véhicule le plus vendu au Canada / Tous les frais liés à la procréation assistée',
'description': 'md5:685a7e4c450ba777c60adb6e71e41526',
'upload_date': '20201019',
'timestamp': 1603115930,
'uploader_id': '6101674910001',
},
'params': {
'format': 'bestvideo',
},
}, {
'url': 'https://video.telequebec.tv/player-live/28527',
'only_matching': True,
}]
def _call_api(self, path, video_id):
return self._download_json(
'http://beacon.playback.api.brightcove.com/telequebec/api/assets/' + path,
video_id, query={'device_layout': 'web', 'device_type': 'web'})['data']
def _real_extract(self, url):
asset_id = self._match_id(url)
asset = self._call_api(asset_id, asset_id)['asset']
stream = self._call_api(
asset_id + '/streams/' + asset['streams'][0]['id'], asset_id)['stream']
stream_url = stream['url']
account_id = try_get(
stream, lambda x: x['video_provider_details']['account_id']) or '6101674910001'
info = self._brightcove_result(stream_url, 'default', account_id)
info.update({
'description': asset.get('long_description') or asset.get('short_description'),
'series': asset.get('series_original_name'),
'season_number': int_or_none(asset.get('season_number')),
'episode': asset.get('original_name'),
'episode_number': int_or_none(asset.get('episode_number')),
})
return info

@ -33,6 +33,19 @@ class TubiTvIE(InfoExtractor):
}, { }, {
'url': 'http://tubitv.com/movies/383676/tracker', 'url': 'http://tubitv.com/movies/383676/tracker',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://tubitv.com/movies/560057/penitentiary?start=true',
'info_dict': {
'id': '560057',
'ext': 'mp4',
'title': 'Penitentiary',
'description': 'md5:8d2fc793a93cc1575ff426fdcb8dd3f9',
'uploader_id': 'd8fed30d4f24fcb22ec294421b9defc2',
'release_year': 1979,
},
'params': {
'skip_download': True,
},
}] }]
def _login(self): def _login(self):
@ -93,4 +106,5 @@ class TubiTvIE(InfoExtractor):
'description': video_data.get('description'), 'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')), 'duration': int_or_none(video_data.get('duration')),
'uploader_id': video_data.get('publisher_id'), 'uploader_id': video_data.get('publisher_id'),
'release_year': int_or_none(video_data.get('year')),
} }

@ -12,11 +12,13 @@ from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_duration,
parse_iso8601, parse_iso8601,
qualities, qualities,
try_get, try_get,
update_url_query, update_url_query,
url_or_none, url_or_none,
urljoin,
) )
@ -414,7 +416,7 @@ class ViafreeIE(InfoExtractor):
class TVPlayHomeIE(InfoExtractor): class TVPlayHomeIE(InfoExtractor):
_VALID_URL = r'https?://tvplay\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/[^/]+/[^/?#&]+-(?P<id>\d+)' _VALID_URL = r'https?://(?:tv3?)?play\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/(?:[^/]+/)*[^/?#&]+-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/', 'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
'info_dict': { 'info_dict': {
@ -433,80 +435,58 @@ class TVPlayHomeIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'add_ie': [TVPlayIE.ie_key()],
}, { }, {
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/', 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/', 'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://play.tv3.lt/aferistai-10047125',
'only_matching': True,
}, {
'url': 'https://tv3play.skaties.lv/vinas-melo-labak-10280317',
'only_matching': True,
}, {
'url': 'https://play.tv3.ee/cool-d-ga-mehhikosse-10044354',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) asset = self._download_json(
urljoin(url, '/sb/public/asset/' + video_id), video_id)
video_id = self._search_regex(
r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id')
if len(video_id) < 8:
return self.url_result(
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
m3u8_url = self._search_regex( m3u8_url = asset['movie']['contentUrl']
r'data-file\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, video_id = asset['assetId']
'm3u8 url', group='url') asset_title = asset['title']
title = asset_title['title']
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
m3u8_id='hls')
self._sort_formats(formats) self._sort_formats(formats)
title = self._search_regex( thumbnails = None
r'data-title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, image_url = asset.get('imageUrl')
'title', default=None, group='value') or self._html_search_meta( if image_url:
'title', webpage, default=None) or self._og_search_title( thumbnails = [{
webpage) 'url': urljoin(url, image_url),
'ext': 'jpg',
description = self._html_search_meta( }]
'description', webpage,
default=None) or self._og_search_description(webpage)
thumbnail = self._search_regex(
r'data-image\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'thumbnail', default=None, group='url') or self._html_search_meta(
'thumbnail', webpage, default=None) or self._og_search_thumbnail(
webpage)
duration = int_or_none(self._search_regex(
r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration',
fatal=False))
season = self._search_regex( metadata = asset.get('metadata') or {}
(r'data-series-title\s*=\s*(["\'])[^/]+/(?P<value>(?:(?!\1).)+)\1',
r'\bseason\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
'season', default=None, group='value')
season_number = int_or_none(self._search_regex(
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
default=None))
episode = self._search_regex(
(r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
'episode', default=None, group='value')
episode_number = int_or_none(self._search_regex(
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
default=None))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': asset_title.get('summaryLong') or asset_title.get('summaryShort'),
'thumbnail': thumbnail, 'thumbnails': thumbnails,
'duration': duration, 'duration': parse_duration(asset_title.get('runTime')),
'season': season, 'series': asset.get('tvSeriesTitle'),
'season_number': season_number, 'season': asset.get('tvSeasonTitle'),
'episode': episode, 'season_number': int_or_none(metadata.get('seasonNumber')),
'episode_number': episode_number, 'episode': asset_title.get('titleBrief'),
'episode_number': int_or_none(metadata.get('episodeNumber')),
'formats': formats, 'formats': formats,
} }

@ -74,11 +74,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)' _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
_YOUTUBE_CLIENT_HEADERS = {
'x-youtube-client-name': '1',
'x-youtube-client-version': '1.20200609.04.02',
}
def _set_language(self): def _set_language(self):
self._set_cookie( self._set_cookie(
'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en', '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
@ -2796,6 +2791,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# no longer available? # no longer available?
'url': 'https://www.youtube.com/feed/recommended', 'url': 'https://www.youtube.com/feed/recommended',
'only_matching': True, 'only_matching': True,
}, {
# inline playlist with not always working continuations
'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
'only_matching': True,
} }
# TODO # TODO
# { # {
@ -2996,6 +2995,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
for entry in self._post_thread_entries(renderer): for entry in self._post_thread_entries(renderer):
yield entry yield entry
@staticmethod
def _build_continuation_query(continuation, ctp=None):
query = {
'ctoken': continuation,
'continuation': continuation,
}
if ctp:
query['itct'] = ctp
return query
@staticmethod @staticmethod
def _extract_next_continuation_data(renderer): def _extract_next_continuation_data(renderer):
next_continuation = try_get( next_continuation = try_get(
@ -3006,11 +3015,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
if not continuation: if not continuation:
return return
ctp = next_continuation.get('clickTrackingParams') ctp = next_continuation.get('clickTrackingParams')
return { return YoutubeTabIE._build_continuation_query(continuation, ctp)
'ctoken': continuation,
'continuation': continuation,
'itct': ctp,
}
@classmethod @classmethod
def _extract_continuation(cls, renderer): def _extract_continuation(cls, renderer):
@ -3033,13 +3038,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
if not continuation: if not continuation:
continue continue
ctp = continuation_ep.get('clickTrackingParams') ctp = continuation_ep.get('clickTrackingParams')
if not ctp: return YoutubeTabIE._build_continuation_query(continuation, ctp)
continue
return {
'ctoken': continuation,
'continuation': continuation,
'itct': ctp,
}
def _entries(self, tab, identity_token): def _entries(self, tab, identity_token):
@ -3232,16 +3231,29 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
playlist.update(self._extract_uploader(data)) playlist.update(self._extract_uploader(data))
return playlist return playlist
def _extract_from_playlist(self, item_id, data, playlist): def _extract_from_playlist(self, item_id, url, data, playlist):
title = playlist.get('title') or try_get( title = playlist.get('title') or try_get(
data, lambda x: x['titleText']['simpleText'], compat_str) data, lambda x: x['titleText']['simpleText'], compat_str)
playlist_id = playlist.get('playlistId') or item_id playlist_id = playlist.get('playlistId') or item_id
# Inline playlist rendition continuation does not always work
# at Youtube side, so delegating regular tab-based playlist URL
# processing whenever possible.
playlist_url = urljoin(url, try_get(
playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
compat_str))
if playlist_url and playlist_url != url:
return self.url_result(
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
video_title=title)
return self.playlist_result( return self.playlist_result(
self._playlist_entries(playlist), playlist_id=playlist_id, self._playlist_entries(playlist), playlist_id=playlist_id,
playlist_title=title) playlist_title=title)
def _extract_alerts(self, data): @staticmethod
def _extract_alerts(data):
for alert_dict in try_get(data, lambda x: x['alerts'], list) or []: for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
if not isinstance(alert_dict, dict):
continue
for renderer in alert_dict: for renderer in alert_dict:
alert = alert_dict[renderer] alert = alert_dict[renderer]
alert_type = alert.get('type') alert_type = alert.get('type')
@ -3255,6 +3267,19 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
if message: if message:
yield alert_type, message yield alert_type, message
def _extract_identity_token(self, webpage, item_id):
ytcfg = self._parse_json(
self._search_regex(
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
default='{}'), item_id, fatal=False)
if ytcfg:
token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
if token:
return token
return self._search_regex(
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
'identity token', default=None)
def _real_extract(self, url): def _real_extract(self, url):
item_id = self._match_id(url) item_id = self._match_id(url)
url = compat_urlparse.urlunparse( url = compat_urlparse.urlunparse(
@ -3285,9 +3310,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
webpage = self._download_webpage(url, item_id) webpage = self._download_webpage(url, item_id)
identity_token = self._search_regex( identity_token = self._extract_identity_token(webpage, item_id)
r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
'identity token', default=None)
data = self._extract_yt_initial_data(item_id, webpage) data = self._extract_yt_initial_data(item_id, webpage)
for alert_type, alert_message in self._extract_alerts(data): for alert_type, alert_message in self._extract_alerts(data):
self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message)) self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
@ -3298,7 +3321,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
playlist = try_get( playlist = try_get(
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
if playlist: if playlist:
return self._extract_from_playlist(item_id, data, playlist) return self._extract_from_playlist(item_id, url, data, playlist)
# Fallback to video extraction if no playlist alike page is recognized. # Fallback to video extraction if no playlist alike page is recognized.
# First check for the current video then try the v attribute of URL query. # First check for the current video then try the v attribute of URL query.
video_id = try_get( video_id = try_get(

Loading…
Cancel
Save