diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 3931de67d..310457f53 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -430,7 +430,8 @@
- **la7.it**
- **laola1tv**
- **laola1tv:embed**
- - **lbry.tv**
+ - **lbry**
+ - **lbry:channel**
- **LCI**
- **Lcp**
- **LcpPlay**
@@ -911,6 +912,7 @@
- **TeleQuebecEmission**
- **TeleQuebecLive**
- **TeleQuebecSquat**
+ - **TeleQuebecVideo**
- **TeleTask**
- **Telewebion**
- **TennisTV**
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index bdd01e41a..9ab9ba7f9 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -108,6 +108,18 @@ class TestInfoExtractor(unittest.TestCase):
self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
def test_parse_html5_media_entries(self):
+ # inline video tag
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://127.0.0.1/video.html',
+ r'', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://127.0.0.1/vid.mp4',
+ }],
+ })
+
# from https://www.r18.com/
# with kpbs in label
expect_dict(
diff --git a/youtube_dlc/extractor/amcnetworks.py b/youtube_dlc/extractor/amcnetworks.py
index 6fb3d6c53..12b6de0bf 100644
--- a/youtube_dlc/extractor/amcnetworks.py
+++ b/youtube_dlc/extractor/amcnetworks.py
@@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .theplatform import ThePlatformIE
from ..utils import (
int_or_none,
@@ -11,25 +13,22 @@ from ..utils import (
class AMCNetworksIE(ThePlatformIE):
- _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?(?Pamc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
_TESTS = [{
- 'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
- 'md5': '',
+ 'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
'info_dict': {
- 'id': 's3MX01Nl4vPH',
+ 'id': '4Lq1dzOnZGt0',
'ext': 'mp4',
- 'title': 'Maron - Season 4 - Step 1',
- 'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
- 'age_limit': 17,
- 'upload_date': '20160505',
- 'timestamp': 1462468831,
+ 'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
+ 'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
+ 'upload_date': '20201120',
+ 'timestamp': 1605904350,
'uploader': 'AMCN',
},
'params': {
# m3u8 download
'skip_download': True,
},
- 'skip': 'Requires TV provider accounts',
}, {
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
'only_matching': True,
@@ -55,32 +54,33 @@ class AMCNetworksIE(ThePlatformIE):
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
'only_matching': True,
}]
+ _REQUESTOR_ID_MAP = {
+ 'amc': 'AMC',
+ 'bbcamerica': 'BBCA',
+ 'ifc': 'IFC',
+ 'sundancetv': 'SUNDANCE',
+ 'wetv': 'WETV',
+ }
def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
+ site, display_id = re.match(self._VALID_URL, url).groups()
+ requestor_id = self._REQUESTOR_ID_MAP[site]
+ properties = self._download_json(
+ 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id),
+ display_id)['data']['properties']
query = {
'mbr': 'true',
'manifest': 'm3u',
}
- media_url = self._search_regex(
- r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
- webpage, 'media url')
- theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
- r'link\.theplatform\.com/s/([^?]+)',
- media_url, 'theplatform_path'), display_id)
+ tp_path = 'M_UwQC/media/' + properties['videoPid']
+ media_url = 'https://link.theplatform.com/s/' + tp_path
+ theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
info = self._parse_theplatform_metadata(theplatform_metadata)
video_id = theplatform_metadata['pid']
title = theplatform_metadata['title']
rating = try_get(
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
- auth_required = self._search_regex(
- r'window\.authRequired\s*=\s*(true|false);',
- webpage, 'auth required')
- if auth_required == 'true':
- requestor_id = self._search_regex(
- r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
- webpage, 'requestor id')
+ if properties.get('videoCategory') == 'TVE-Auth':
resource = self._get_mvpd_resource(
requestor_id, title, video_id, rating)
query['auth'] = self._extract_mvpd_auth(
diff --git a/youtube_dlc/extractor/americastestkitchen.py b/youtube_dlc/extractor/americastestkitchen.py
index 9c9d77ae1..e20f00fc3 100644
--- a/youtube_dlc/extractor/americastestkitchen.py
+++ b/youtube_dlc/extractor/americastestkitchen.py
@@ -1,33 +1,33 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import (
clean_html,
- int_or_none,
- js_to_json,
try_get,
unified_strdate,
)
class AmericasTestKitchenIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P\d+)'
+ _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?Pepisode|videos)/(?P\d+)'
_TESTS = [{
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
'md5': 'b861c3e365ac38ad319cfd509c30577f',
'info_dict': {
'id': '5b400b9ee338f922cb06450c',
- 'title': 'Weeknight Japanese Suppers',
+ 'title': 'Japanese Suppers',
'ext': 'mp4',
- 'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8',
+ 'description': 'md5:64e606bfee910627efc4b5f050de92b3',
'thumbnail': r're:^https?://',
'timestamp': 1523664000,
'upload_date': '20180414',
- 'release_date': '20180414',
+ 'release_date': '20180410',
'series': "America's Test Kitchen",
'season_number': 18,
- 'episode': 'Weeknight Japanese Suppers',
+ 'episode': 'Japanese Suppers',
'episode_number': 15,
},
'params': {
@@ -36,47 +36,31 @@ class AmericasTestKitchenIE(InfoExtractor):
}, {
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True,
+ }, {
+ 'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
+ resource_type, video_id = re.match(self._VALID_URL, url).groups()
+ is_episode = resource_type == 'episode'
+ if is_episode:
+ resource_type = 'episodes'
- video_data = self._parse_json(
- self._search_regex(
- r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*',
- webpage, 'initial context'),
- video_id, js_to_json)
-
- ep_data = try_get(
- video_data,
- (lambda x: x['episodeDetail']['content']['data'],
- lambda x: x['videoDetail']['content']['data']), dict)
- ep_meta = ep_data.get('full_video', {})
-
- zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
-
- title = ep_data.get('title') or ep_meta.get('title')
- description = clean_html(ep_meta.get('episode_description') or ep_data.get(
- 'description') or ep_meta.get('description'))
- thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
- release_date = unified_strdate(ep_data.get('aired_at'))
-
- season_number = int_or_none(ep_meta.get('season_number'))
- episode = ep_meta.get('title')
- episode_number = int_or_none(ep_meta.get('episode_number'))
+ resource = self._download_json(
+ 'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
+ video = resource['video'] if is_episode else resource
+ episode = resource if is_episode else resource.get('episode') or {}
return {
'_type': 'url_transparent',
- 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id,
+ 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
'ie_key': 'Zype',
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'release_date': release_date,
- 'series': "America's Test Kitchen",
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
+ 'description': clean_html(video.get('description')),
+ 'release_date': unified_strdate(video.get('publishDate')),
+ 'series': try_get(episode, lambda x: x['show']['title']),
+ 'episode': episode.get('title'),
}
diff --git a/youtube_dlc/extractor/common.py b/youtube_dlc/extractor/common.py
index 44b914247..5bcdf2dc0 100644
--- a/youtube_dlc/extractor/common.py
+++ b/youtube_dlc/extractor/common.py
@@ -2516,9 +2516,9 @@ class InfoExtractor(object):
# https://www.ampproject.org/docs/reference/components/amp-video)
# For dl8-* tags see https://delight-vr.com/documentation/dl8-video/
_MEDIA_TAG_NAME_RE = r'(?:(?:amp|dl8(?:-live)?)-)?(video|audio)'
- media_tags = [(media_tag, media_type, '')
- for media_tag, media_type
- in re.findall(r'(?s)(<%s[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
+ media_tags = [(media_tag, media_tag_name, media_type, '')
+ for media_tag, media_tag_name, media_type
+ in re.findall(r'(?s)(<(%s)[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)]
media_tags.extend(re.findall(
# We only allow video|audio followed by a whitespace or '>'.
# Allowing more characters may end up in significant slow down (see
diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py
index f49b998fa..f4135a6dd 100644
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@@ -555,7 +555,10 @@ from .laola1tv import (
EHFTVIE,
ITTFIE,
)
-from .lbry import LBRYIE
+from .lbry import (
+ LBRYIE,
+ LBRYChannelIE,
+)
from .lci import LCIIE
from .lcp import (
LcpPlayIE,
@@ -1192,6 +1195,7 @@ from .telequebec import (
TeleQuebecSquatIE,
TeleQuebecEmissionIE,
TeleQuebecLiveIE,
+ TeleQuebecVideoIE,
)
from .teletask import TeleTaskIE
from .telewebion import TelewebionIE
diff --git a/youtube_dlc/extractor/generic.py b/youtube_dlc/extractor/generic.py
index 83cf204cb..91b13a4a9 100644
--- a/youtube_dlc/extractor/generic.py
+++ b/youtube_dlc/extractor/generic.py
@@ -2114,23 +2114,23 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
- {
- # Zype embed
- 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
- 'info_dict': {
- 'id': '5b400b834b32992a310622b9',
- 'ext': 'mp4',
- 'title': 'Smoky Barbecue Favorites',
- 'thumbnail': r're:^https?://.*\.jpe?g',
- 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
- 'upload_date': '20170909',
- 'timestamp': 1504915200,
- },
- 'add_ie': [ZypeIE.ie_key()],
- 'params': {
- 'skip_download': True,
- },
- },
+ # {
+ # # Zype embed
+ # 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
+ # 'info_dict': {
+ # 'id': '5b400b834b32992a310622b9',
+ # 'ext': 'mp4',
+ # 'title': 'Smoky Barbecue Favorites',
+ # 'thumbnail': r're:^https?://.*\.jpe?g',
+ # 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
+ # 'upload_date': '20170909',
+ # 'timestamp': 1504915200,
+ # },
+ # 'add_ie': [ZypeIE.ie_key()],
+ # 'params': {
+ # 'skip_download': True,
+ # },
+ # },
{
# videojs embed
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
diff --git a/youtube_dlc/extractor/lbry.py b/youtube_dlc/extractor/lbry.py
index 6177297ab..41cc245eb 100644
--- a/youtube_dlc/extractor/lbry.py
+++ b/youtube_dlc/extractor/lbry.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import functools
import json
from .common import InfoExtractor
@@ -10,13 +11,73 @@ from ..utils import (
ExtractorError,
int_or_none,
mimetype2ext,
+ OnDemandPagedList,
try_get,
+ urljoin,
)
-class LBRYIE(InfoExtractor):
- IE_NAME = 'lbry.tv'
- _VALID_URL = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/(?P@[^:]+:[0-9a-z]+/[^:]+:[0-9a-z])'
+class LBRYBaseIE(InfoExtractor):
+ _BASE_URL_REGEX = r'https?://(?:www\.)?(?:lbry\.tv|odysee\.com)/'
+ _CLAIM_ID_REGEX = r'[0-9a-f]{1,40}'
+ _OPT_CLAIM_ID = '[^:/?#&]+(?::%s)?' % _CLAIM_ID_REGEX
+ _SUPPORTED_STREAM_TYPES = ['video', 'audio']
+
+ def _call_api_proxy(self, method, display_id, params, resource):
+ return self._download_json(
+ 'https://api.lbry.tv/api/v1/proxy',
+ display_id, 'Downloading %s JSON metadata' % resource,
+ headers={'Content-Type': 'application/json-rpc'},
+ data=json.dumps({
+ 'method': method,
+ 'params': params,
+ }).encode())['result']
+
+ def _resolve_url(self, url, display_id, resource):
+ return self._call_api_proxy(
+ 'resolve', display_id, {'urls': url}, resource)[url]
+
+ def _permanent_url(self, url, claim_name, claim_id):
+ return urljoin(url, '/%s:%s' % (claim_name, claim_id))
+
+ def _parse_stream(self, stream, url):
+ stream_value = stream.get('value') or {}
+ stream_type = stream_value.get('stream_type')
+ source = stream_value.get('source') or {}
+ media = stream_value.get(stream_type) or {}
+ signing_channel = stream.get('signing_channel') or {}
+ channel_name = signing_channel.get('name')
+ channel_claim_id = signing_channel.get('claim_id')
+ channel_url = None
+ if channel_name and channel_claim_id:
+ channel_url = self._permanent_url(url, channel_name, channel_claim_id)
+
+ info = {
+ 'thumbnail': try_get(stream_value, lambda x: x['thumbnail']['url'], compat_str),
+ 'description': stream_value.get('description'),
+ 'license': stream_value.get('license'),
+ 'timestamp': int_or_none(stream.get('timestamp')),
+ 'tags': stream_value.get('tags'),
+ 'duration': int_or_none(media.get('duration')),
+ 'channel': try_get(signing_channel, lambda x: x['value']['title']),
+ 'channel_id': channel_claim_id,
+ 'channel_url': channel_url,
+ 'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
+ 'filesize': int_or_none(source.get('size')),
+ }
+ if stream_type == 'audio':
+ info['vcodec'] = 'none'
+ else:
+ info.update({
+ 'width': int_or_none(media.get('width')),
+ 'height': int_or_none(media.get('height')),
+ })
+ return info
+
+
+class LBRYIE(LBRYBaseIE):
+ IE_NAME = 'lbry'
+ _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P\$/[^/]+/[^/]+/{1}|@{0}/{0}|(?!@){0})'.format(LBRYBaseIE._OPT_CLAIM_ID, LBRYBaseIE._CLAIM_ID_REGEX)
_TESTS = [{
# Video
'url': 'https://lbry.tv/@Mantega:1/First-day-LBRY:1',
@@ -28,6 +89,8 @@ class LBRYIE(InfoExtractor):
'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
'timestamp': 1595694354,
'upload_date': '20200725',
+ 'width': 1280,
+ 'height': 720,
}
}, {
# Audio
@@ -40,6 +103,12 @@ class LBRYIE(InfoExtractor):
'description': 'md5:661ac4f1db09f31728931d7b88807a61',
'timestamp': 1591312601,
'upload_date': '20200604',
+ 'tags': list,
+ 'duration': 2570,
+ 'channel': 'The LBRY Foundation',
+ 'channel_id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
+ 'channel_url': 'https://lbry.tv/@LBRYFoundation:0ed629d2b9c601300cacf7eabe9da0be79010212',
+ 'vcodec': 'none',
}
}, {
'url': 'https://odysee.com/@BrodieRobertson:5/apple-is-tracking-everything-you-do-on:e',
@@ -47,45 +116,99 @@ class LBRYIE(InfoExtractor):
}, {
'url': "https://odysee.com/@ScammerRevolts:b0/I-SYSKEY'D-THE-SAME-SCAMMERS-3-TIMES!:b",
'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/Episode-1:e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/$/embed/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/Episode-1:e7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/@LBRYFoundation/Episode-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
+ 'only_matching': True,
}]
- def _call_api_proxy(self, method, display_id, params):
- return self._download_json(
- 'https://api.lbry.tv/api/v1/proxy', display_id,
- headers={'Content-Type': 'application/json-rpc'},
- data=json.dumps({
- 'method': method,
- 'params': params,
- }).encode())['result']
-
def _real_extract(self, url):
- display_id = self._match_id(url).replace(':', '#')
+ display_id = self._match_id(url)
+ if display_id.startswith('$/'):
+ display_id = display_id.split('/', 2)[-1].replace('/', ':')
+ else:
+ display_id = display_id.replace(':', '#')
uri = 'lbry://' + display_id
- result = self._call_api_proxy(
- 'resolve', display_id, {'urls': [uri]})[uri]
+ result = self._resolve_url(uri, display_id, 'stream')
result_value = result['value']
- if result_value.get('stream_type') not in ('video', 'audio'):
+ if result_value.get('stream_type') not in self._SUPPORTED_STREAM_TYPES:
raise ExtractorError('Unsupported URL', expected=True)
+ claim_id = result['claim_id']
+ title = result_value['title']
streaming_url = self._call_api_proxy(
- 'get', display_id, {'uri': uri})['streaming_url']
- source = result_value.get('source') or {}
- media = result_value.get('video') or result_value.get('audio') or {}
- signing_channel = result_value.get('signing_channel') or {}
-
- return {
- 'id': result['claim_id'],
- 'title': result_value['title'],
- 'thumbnail': try_get(result_value, lambda x: x['thumbnail']['url'], compat_str),
- 'description': result_value.get('description'),
- 'license': result_value.get('license'),
- 'timestamp': int_or_none(result.get('timestamp')),
- 'tags': result_value.get('tags'),
- 'width': int_or_none(media.get('width')),
- 'height': int_or_none(media.get('height')),
- 'duration': int_or_none(media.get('duration')),
- 'channel': signing_channel.get('name'),
- 'channel_id': signing_channel.get('claim_id'),
- 'ext': determine_ext(source.get('name')) or mimetype2ext(source.get('media_type')),
- 'filesize': int_or_none(source.get('size')),
+ 'get', claim_id, {'uri': uri}, 'streaming url')['streaming_url']
+ info = self._parse_stream(result, url)
+ info.update({
+ 'id': claim_id,
+ 'title': title,
'url': streaming_url,
- }
+ })
+ return info
+
+
+class LBRYChannelIE(LBRYBaseIE):
+ IE_NAME = 'lbry:channel'
+ _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'(?P@%s)/?(?:[?#&]|$)' % LBRYBaseIE._OPT_CLAIM_ID
+ _TESTS = [{
+ 'url': 'https://lbry.tv/@LBRYFoundation:0',
+ 'info_dict': {
+ 'id': '0ed629d2b9c601300cacf7eabe9da0be79010212',
+ 'title': 'The LBRY Foundation',
+ 'description': 'Channel for the LBRY Foundation. Follow for updates and news.',
+ },
+ 'playlist_count': 29,
+ }, {
+ 'url': 'https://lbry.tv/@LBRYFoundation',
+ 'only_matching': True,
+ }]
+ _PAGE_SIZE = 50
+
+ def _fetch_page(self, claim_id, url, page):
+ page += 1
+ result = self._call_api_proxy(
+ 'claim_search', claim_id, {
+ 'channel_ids': [claim_id],
+ 'claim_type': 'stream',
+ 'no_totals': True,
+ 'page': page,
+ 'page_size': self._PAGE_SIZE,
+ 'stream_types': self._SUPPORTED_STREAM_TYPES,
+ }, 'page %d' % page)
+ for item in (result.get('items') or []):
+ stream_claim_name = item.get('name')
+ stream_claim_id = item.get('claim_id')
+ if not (stream_claim_name and stream_claim_id):
+ continue
+
+ info = self._parse_stream(item, url)
+ info.update({
+ '_type': 'url',
+ 'id': stream_claim_id,
+ 'title': try_get(item, lambda x: x['value']['title']),
+ 'url': self._permanent_url(url, stream_claim_name, stream_claim_id),
+ })
+ yield info
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url).replace(':', '#')
+ result = self._resolve_url(
+ 'lbry://' + display_id, display_id, 'channel')
+ claim_id = result['claim_id']
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, claim_id, url),
+ self._PAGE_SIZE)
+ result_value = result.get('value') or {}
+ return self.playlist_result(
+ entries, claim_id, result_value.get('title'),
+ result_value.get('description'))
diff --git a/youtube_dlc/extractor/slideslive.py b/youtube_dlc/extractor/slideslive.py
index d9ea76831..cd70841a9 100644
--- a/youtube_dlc/extractor/slideslive.py
+++ b/youtube_dlc/extractor/slideslive.py
@@ -2,7 +2,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import smuggle_url
+from ..utils import (
+ bool_or_none,
+ smuggle_url,
+ try_get,
+ url_or_none,
+)
class SlidesLiveIE(InfoExtractor):
@@ -18,8 +23,21 @@ class SlidesLiveIE(InfoExtractor):
'description': 'Watch full version of this video at https://slideslive.com/38902413.',
'uploader': 'SlidesLive Videos - A',
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
+ 'timestamp': 1597615266,
'upload_date': '20170925',
}
+ }, {
+ # video_service_name = yoda
+ 'url': 'https://slideslive.com/38935785',
+ 'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
+ 'info_dict': {
+ 'id': 'RMraDYN5ozA_',
+ 'ext': 'mp4',
+ 'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
}, {
# video_service_name = youtube
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
@@ -39,18 +57,47 @@ class SlidesLiveIE(InfoExtractor):
video_data = self._download_json(
'https://ben.slideslive.com/player/' + video_id, video_id)
service_name = video_data['video_service_name'].lower()
- assert service_name in ('url', 'vimeo', 'youtube')
+ assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
service_id = video_data['video_service_id']
+ subtitles = {}
+ for sub in try_get(video_data, lambda x: x['subtitles'], list) or []:
+ if not isinstance(sub, dict):
+ continue
+ webvtt_url = url_or_none(sub.get('webvtt_url'))
+ if not webvtt_url:
+ continue
+ lang = sub.get('language') or 'en'
+ subtitles.setdefault(lang, []).append({
+ 'url': webvtt_url,
+ })
info = {
'id': video_id,
'thumbnail': video_data.get('thumbnail'),
- 'url': service_id,
+ 'is_live': bool_or_none(video_data.get('is_live')),
+ 'subtitles': subtitles,
}
- if service_name == 'url':
+ if service_name in ('url', 'yoda'):
info['title'] = video_data['title']
+ if service_name == 'url':
+ info['url'] = service_id
+ else:
+ formats = []
+ _MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s'
+ formats.extend(self._extract_m3u8_formats(
+ _MANIFEST_PATTERN % (service_id, 'm3u8'), service_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ _MANIFEST_PATTERN % (service_id, 'mpd'), service_id,
+ mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+ info.update({
+ 'id': service_id,
+ 'formats': formats,
+ })
else:
info.update({
'_type': 'url_transparent',
+ 'url': service_id,
'ie_key': service_name.capitalize(),
'title': video_data.get('title'),
})
diff --git a/youtube_dlc/extractor/telequebec.py b/youtube_dlc/extractor/telequebec.py
index b4c485b9b..800d87b70 100644
--- a/youtube_dlc/extractor/telequebec.py
+++ b/youtube_dlc/extractor/telequebec.py
@@ -12,25 +12,16 @@ from ..utils import (
class TeleQuebecBaseIE(InfoExtractor):
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+
@staticmethod
- def _result(url, ie_key):
+ def _brightcove_result(brightcove_id, player_id, account_id='6150020952001'):
return {
'_type': 'url_transparent',
- 'url': smuggle_url(url, {'geo_countries': ['CA']}),
- 'ie_key': ie_key,
+ 'url': smuggle_url(TeleQuebecBaseIE.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, brightcove_id), {'geo_countries': ['CA']}),
+ 'ie_key': 'BrightcoveNew',
}
- @staticmethod
- def _limelight_result(media_id):
- return TeleQuebecBaseIE._result(
- 'limelight:media:' + media_id, 'LimelightMedia')
-
- @staticmethod
- def _brightcove_result(brightcove_id):
- return TeleQuebecBaseIE._result(
- 'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s'
- % brightcove_id, 'BrightcoveNew')
-
class TeleQuebecIE(TeleQuebecBaseIE):
_VALID_URL = r'''(?x)
@@ -44,14 +35,18 @@ class TeleQuebecIE(TeleQuebecBaseIE):
# available till 01.01.2023
'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
'info_dict': {
- 'id': '577116881b4b439084e6b1cf4ef8b1b3',
+ 'id': '6155972771001',
'ext': 'mp4',
'title': 'Un petit choc et puis repart!',
- 'description': 'md5:067bc84bd6afecad85e69d1000730907',
+ 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
+ 'timestamp': 1589262469,
+ 'uploader_id': '6150020952001',
+ 'upload_date': '20200512',
},
'params': {
- 'skip_download': True,
+ 'format': 'bestvideo',
},
+ 'add_ie': ['BrightcoveNew'],
}, {
'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout',
'info_dict': {
@@ -65,7 +60,6 @@ class TeleQuebecIE(TeleQuebecBaseIE):
},
'params': {
'format': 'bestvideo',
- 'skip_download': True,
},
'add_ie': ['BrightcoveNew'],
}, {
@@ -79,25 +73,20 @@ class TeleQuebecIE(TeleQuebecBaseIE):
def _real_extract(self, url):
media_id = self._match_id(url)
-
- media_data = self._download_json(
- 'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
+ media = self._download_json(
+ 'https://mnmedias.api.telequebec.tv/api/v3/media/' + media_id,
media_id)['media']
-
- source_id = media_data['streamInfo']['sourceId']
- source = (try_get(
- media_data, lambda x: x['streamInfo']['source'],
- compat_str) or 'limelight').lower()
- if source == 'brightcove':
- info = self._brightcove_result(source_id)
- else:
- info = self._limelight_result(source_id)
+ source_id = next(source_info['sourceId'] for source_info in media['streamInfos'] if source_info.get('source') == 'Brightcove')
+ info = self._brightcove_result(source_id, '22gPKdt7f')
+ product = media.get('product') or {}
+ season = product.get('season') or {}
info.update({
- 'title': media_data.get('title'),
- 'description': try_get(
- media_data, lambda x: x['descriptions'][0]['text'], compat_str),
- 'duration': int_or_none(
- media_data.get('durationInMilliseconds'), 1000),
+ 'description': try_get(media, lambda x: x['descriptions'][-1]['text'], compat_str),
+ 'series': try_get(season, lambda x: x['serie']['titre']),
+ 'season': season.get('name'),
+ 'season_number': int_or_none(season.get('seasonNo')),
+ 'episode': product.get('titre'),
+ 'episode_number': int_or_none(product.get('episodeNo')),
})
return info
@@ -148,7 +137,7 @@ class TeleQuebecSquatIE(InfoExtractor):
}
-class TeleQuebecEmissionIE(TeleQuebecBaseIE):
+class TeleQuebecEmissionIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
@@ -160,15 +149,16 @@ class TeleQuebecEmissionIE(TeleQuebecBaseIE):
_TESTS = [{
'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
'info_dict': {
- 'id': '66648a6aef914fe3badda25e81a4d50a',
+ 'id': '6154476028001',
'ext': 'mp4',
- 'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?",
- 'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014',
- 'upload_date': '20171024',
- 'timestamp': 1508862118,
+ 'title': 'Des soins esthétiques à 377 % d’intérêts annuels, ça vous tente?',
+ 'description': 'md5:cb4d378e073fae6cce1f87c00f84ae9f',
+ 'upload_date': '20200505',
+ 'timestamp': 1588713424,
+ 'uploader_id': '6150020952001',
},
'params': {
- 'skip_download': True,
+ 'format': 'bestvideo',
},
}, {
'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
@@ -187,26 +177,26 @@ class TeleQuebecEmissionIE(TeleQuebecBaseIE):
webpage = self._download_webpage(url, display_id)
media_id = self._search_regex(
- r'mediaUID\s*:\s*["\'][Ll]imelight_(?P[a-z0-9]{32})', webpage,
- 'limelight id')
+ r'mediaId\s*:\s*(?P\d+)', webpage, 'media id')
- info = self._limelight_result(media_id)
- info.update({
- 'title': self._og_search_title(webpage, default=None),
- 'description': self._og_search_description(webpage, default=None),
- })
- return info
+ return self.url_result(
+ 'http://zonevideo.telequebec.tv/media/' + media_id,
+ TeleQuebecIE.ie_key())
-class TeleQuebecLiveIE(InfoExtractor):
+class TeleQuebecLiveIE(TeleQuebecBaseIE):
_VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?Pendirect)'
_TEST = {
'url': 'http://zonevideo.telequebec.tv/endirect/',
'info_dict': {
- 'id': 'endirect',
+ 'id': '6159095684001',
'ext': 'mp4',
- 'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'title': 're:^Télé-Québec [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'is_live': True,
+ 'description': 'Canal principal de Télé-Québec',
+ 'uploader_id': '6150020952001',
+ 'timestamp': 1590439901,
+ 'upload_date': '20200525',
},
'params': {
'skip_download': True,
@@ -214,25 +204,49 @@ class TeleQuebecLiveIE(InfoExtractor):
}
def _real_extract(self, url):
- video_id = self._match_id(url)
+ return self._brightcove_result('6159095684001', 'skCsmi2Uw')
- m3u8_url = None
- webpage = self._download_webpage(
- 'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id,
- fatal=False)
- if webpage:
- m3u8_url = self._search_regex(
- r'm3U8Url\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage,
- 'm3u8 url', default=None, group='url')
- if not m3u8_url:
- m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8'
- formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
- return {
- 'id': video_id,
- 'title': self._live_title('Télé-Québec - En direct'),
- 'is_live': True,
- 'formats': formats,
- }
+class TeleQuebecVideoIE(TeleQuebecBaseIE):
+ _VALID_URL = r'https?://video\.telequebec\.tv/player(?:-live)?/(?P\d+)'
+ _TESTS = [{
+ 'url': 'https://video.telequebec.tv/player/31110/stream',
+ 'info_dict': {
+ 'id': '6202570652001',
+ 'ext': 'mp4',
+ 'title': 'Le coût du véhicule le plus vendu au Canada / Tous les frais liés à la procréation assistée',
+ 'description': 'md5:685a7e4c450ba777c60adb6e71e41526',
+ 'upload_date': '20201019',
+ 'timestamp': 1603115930,
+ 'uploader_id': '6101674910001',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ }, {
+ 'url': 'https://video.telequebec.tv/player-live/28527',
+ 'only_matching': True,
+ }]
+
+ def _call_api(self, path, video_id):
+ return self._download_json(
+ 'http://beacon.playback.api.brightcove.com/telequebec/api/assets/' + path,
+ video_id, query={'device_layout': 'web', 'device_type': 'web'})['data']
+
+ def _real_extract(self, url):
+ asset_id = self._match_id(url)
+ asset = self._call_api(asset_id, asset_id)['asset']
+ stream = self._call_api(
+ asset_id + '/streams/' + asset['streams'][0]['id'], asset_id)['stream']
+ stream_url = stream['url']
+ account_id = try_get(
+ stream, lambda x: x['video_provider_details']['account_id']) or '6101674910001'
+ info = self._brightcove_result(stream_url, 'default', account_id)
+ info.update({
+ 'description': asset.get('long_description') or asset.get('short_description'),
+ 'series': asset.get('series_original_name'),
+ 'season_number': int_or_none(asset.get('season_number')),
+ 'episode': asset.get('original_name'),
+ 'episode_number': int_or_none(asset.get('episode_number')),
+ })
+ return info
diff --git a/youtube_dlc/extractor/tubitv.py b/youtube_dlc/extractor/tubitv.py
index a51fa6515..ebfb05c63 100644
--- a/youtube_dlc/extractor/tubitv.py
+++ b/youtube_dlc/extractor/tubitv.py
@@ -33,6 +33,19 @@ class TubiTvIE(InfoExtractor):
}, {
'url': 'http://tubitv.com/movies/383676/tracker',
'only_matching': True,
+ }, {
+ 'url': 'https://tubitv.com/movies/560057/penitentiary?start=true',
+ 'info_dict': {
+ 'id': '560057',
+ 'ext': 'mp4',
+ 'title': 'Penitentiary',
+ 'description': 'md5:8d2fc793a93cc1575ff426fdcb8dd3f9',
+ 'uploader_id': 'd8fed30d4f24fcb22ec294421b9defc2',
+ 'release_year': 1979,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
}]
def _login(self):
@@ -93,4 +106,5 @@ class TubiTvIE(InfoExtractor):
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'uploader_id': video_data.get('publisher_id'),
+ 'release_year': int_or_none(video_data.get('year')),
}
diff --git a/youtube_dlc/extractor/tvplay.py b/youtube_dlc/extractor/tvplay.py
index 3c2450dd0..0d858c025 100644
--- a/youtube_dlc/extractor/tvplay.py
+++ b/youtube_dlc/extractor/tvplay.py
@@ -12,11 +12,13 @@ from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
+ parse_duration,
parse_iso8601,
qualities,
try_get,
update_url_query,
url_or_none,
+ urljoin,
)
@@ -414,7 +416,7 @@ class ViafreeIE(InfoExtractor):
class TVPlayHomeIE(InfoExtractor):
- _VALID_URL = r'https?://tvplay\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/[^/]+/[^/?#&]+-(?P\d+)'
+ _VALID_URL = r'https?://(?:tv3?)?play\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/(?:[^/]+/)*[^/?#&]+-(?P\d+)'
_TESTS = [{
'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
'info_dict': {
@@ -433,80 +435,58 @@ class TVPlayHomeIE(InfoExtractor):
'params': {
'skip_download': True,
},
- 'add_ie': [TVPlayIE.ie_key()],
}, {
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
'only_matching': True,
}, {
'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
'only_matching': True,
+ }, {
+ 'url': 'https://play.tv3.lt/aferistai-10047125',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv3play.skaties.lv/vinas-melo-labak-10280317',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://play.tv3.ee/cool-d-ga-mehhikosse-10044354',
+ 'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_id = self._search_regex(
- r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id')
-
- if len(video_id) < 8:
- return self.url_result(
- 'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
+ asset = self._download_json(
+ urljoin(url, '/sb/public/asset/' + video_id), video_id)
- m3u8_url = self._search_regex(
- r'data-file\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage,
- 'm3u8 url', group='url')
+ m3u8_url = asset['movie']['contentUrl']
+ video_id = asset['assetId']
+ asset_title = asset['title']
+ title = asset_title['title']
formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
+ m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
self._sort_formats(formats)
- title = self._search_regex(
- r'data-title\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage,
- 'title', default=None, group='value') or self._html_search_meta(
- 'title', webpage, default=None) or self._og_search_title(
- webpage)
-
- description = self._html_search_meta(
- 'description', webpage,
- default=None) or self._og_search_description(webpage)
-
- thumbnail = self._search_regex(
- r'data-image\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage,
- 'thumbnail', default=None, group='url') or self._html_search_meta(
- 'thumbnail', webpage, default=None) or self._og_search_thumbnail(
- webpage)
-
- duration = int_or_none(self._search_regex(
- r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration',
- fatal=False))
+ thumbnails = None
+ image_url = asset.get('imageUrl')
+ if image_url:
+ thumbnails = [{
+ 'url': urljoin(url, image_url),
+ 'ext': 'jpg',
+ }]
- season = self._search_regex(
- (r'data-series-title\s*=\s*(["\'])[^/]+/(?P(?:(?!\1).)+)\1',
- r'\bseason\s*:\s*(["\'])(?P(?:(?!\1).)+)\1'), webpage,
- 'season', default=None, group='value')
- season_number = int_or_none(self._search_regex(
- r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
- default=None))
- episode = self._search_regex(
- (r'\bepisode\s*:\s*(["\'])(?P(?:(?!\1).)+)\1',
- r'data-subtitle\s*=\s*(["\'])(?P(?:(?!\1).)+)\1'), webpage,
- 'episode', default=None, group='value')
- episode_number = int_or_none(self._search_regex(
- r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
- default=None))
+ metadata = asset.get('metadata') or {}
return {
'id': video_id,
'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'season': season,
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
+ 'description': asset_title.get('summaryLong') or asset_title.get('summaryShort'),
+ 'thumbnails': thumbnails,
+ 'duration': parse_duration(asset_title.get('runTime')),
+ 'series': asset.get('tvSeriesTitle'),
+ 'season': asset.get('tvSeasonTitle'),
+ 'season_number': int_or_none(metadata.get('seasonNumber')),
+ 'episode': asset_title.get('titleBrief'),
+ 'episode_number': int_or_none(metadata.get('episodeNumber')),
'formats': formats,
}
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py
index b08ac6655..aa57fabb1 100644
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -74,11 +74,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
- _YOUTUBE_CLIENT_HEADERS = {
- 'x-youtube-client-name': '1',
- 'x-youtube-client-version': '1.20200609.04.02',
- }
-
def _set_language(self):
self._set_cookie(
'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
@@ -2796,6 +2791,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# no longer available?
'url': 'https://www.youtube.com/feed/recommended',
'only_matching': True,
+ }, {
+ # inline playlist with not always working continuations
+ 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
+ 'only_matching': True,
}
# TODO
# {
@@ -2996,6 +2995,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
for entry in self._post_thread_entries(renderer):
yield entry
+ @staticmethod
+ def _build_continuation_query(continuation, ctp=None):
+ query = {
+ 'ctoken': continuation,
+ 'continuation': continuation,
+ }
+ if ctp:
+ query['itct'] = ctp
+ return query
+
@staticmethod
def _extract_next_continuation_data(renderer):
next_continuation = try_get(
@@ -3006,11 +3015,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
if not continuation:
return
ctp = next_continuation.get('clickTrackingParams')
- return {
- 'ctoken': continuation,
- 'continuation': continuation,
- 'itct': ctp,
- }
+ return YoutubeTabIE._build_continuation_query(continuation, ctp)
@classmethod
def _extract_continuation(cls, renderer):
@@ -3033,13 +3038,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
if not continuation:
continue
ctp = continuation_ep.get('clickTrackingParams')
- if not ctp:
- continue
- return {
- 'ctoken': continuation,
- 'continuation': continuation,
- 'itct': ctp,
- }
+ return YoutubeTabIE._build_continuation_query(continuation, ctp)
def _entries(self, tab, identity_token):
@@ -3232,16 +3231,29 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
playlist.update(self._extract_uploader(data))
return playlist
- def _extract_from_playlist(self, item_id, data, playlist):
+ def _extract_from_playlist(self, item_id, url, data, playlist):
title = playlist.get('title') or try_get(
data, lambda x: x['titleText']['simpleText'], compat_str)
playlist_id = playlist.get('playlistId') or item_id
+ # Inline playlist rendition continuation does not always work
+ # at Youtube side, so delegating regular tab-based playlist URL
+ # processing whenever possible.
+ playlist_url = urljoin(url, try_get(
+ playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
+ compat_str))
+ if playlist_url and playlist_url != url:
+ return self.url_result(
+ playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
+ video_title=title)
return self.playlist_result(
self._playlist_entries(playlist), playlist_id=playlist_id,
playlist_title=title)
- def _extract_alerts(self, data):
+ @staticmethod
+ def _extract_alerts(data):
for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
+ if not isinstance(alert_dict, dict):
+ continue
for renderer in alert_dict:
alert = alert_dict[renderer]
alert_type = alert.get('type')
@@ -3255,6 +3267,19 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
if message:
yield alert_type, message
+ def _extract_identity_token(self, webpage, item_id):
+ ytcfg = self._parse_json(
+ self._search_regex(
+ r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
+ default='{}'), item_id, fatal=False)
+ if ytcfg:
+ token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
+ if token:
+ return token
+ return self._search_regex(
+ r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
+ 'identity token', default=None)
+
def _real_extract(self, url):
item_id = self._match_id(url)
url = compat_urlparse.urlunparse(
@@ -3285,9 +3310,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
webpage = self._download_webpage(url, item_id)
- identity_token = self._search_regex(
- r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
- 'identity token', default=None)
+ identity_token = self._extract_identity_token(webpage, item_id)
data = self._extract_yt_initial_data(item_id, webpage)
for alert_type, alert_message in self._extract_alerts(data):
self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
@@ -3298,7 +3321,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
playlist = try_get(
data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
if playlist:
- return self._extract_from_playlist(item_id, data, playlist)
+ return self._extract_from_playlist(item_id, url, data, playlist)
# Fallback to video extraction if no playlist alike page is recognized.
# First check for the current video then try the v attribute of URL query.
video_id = try_get(