Update to 2020.12.26

pull/280/head
pukkandan 5 years ago
parent 3221ffac20
commit 8bff4f84b5

@ -112,6 +112,7 @@
- **blinkx** - **blinkx**
- **Bloomberg** - **Bloomberg**
- **BokeCC** - **BokeCC**
- **BongaCams**
- **BostonGlobe** - **BostonGlobe**
- **Box** - **Box**
- **Bpb**: Bundeszentrale für politische Bildung - **Bpb**: Bundeszentrale für politische Bildung
@ -146,6 +147,7 @@
- **CBS** - **CBS**
- **CBSInteractive** - **CBSInteractive**
- **CBSLocal** - **CBSLocal**
- **CBSLocalArticle**
- **cbsnews**: CBS News - **cbsnews**: CBS News
- **cbsnews:embed** - **cbsnews:embed**
- **cbsnews:livevideo**: CBS News Live Videos - **cbsnews:livevideo**: CBS News Live Videos
@ -198,6 +200,7 @@
- **CSNNE** - **CSNNE**
- **CSpan**: C-SPAN - **CSpan**: C-SPAN
- **CtsNews**: 華視新聞 - **CtsNews**: 華視新聞
- **CTV**
- **CTVNews** - **CTVNews**
- **cu.ntv.co.jp**: Nippon Television Network - **cu.ntv.co.jp**: Nippon Television Network
- **Culturebox** - **Culturebox**
@ -1119,6 +1122,7 @@
- **WeiboMobile** - **WeiboMobile**
- **WeiqiTV**: WQTV - **WeiqiTV**: WQTV
- **Wistia** - **Wistia**
- **WistiaPlaylist**
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **WorldStarHipHop** - **WorldStarHipHop**
- **WSJ**: Wall Street Journal - **WSJ**: Wall Street Journal

@ -39,7 +39,7 @@ class TestAllURLsMatching(unittest.TestCase):
assertTab('https://www.youtube.com/embedded') assertTab('https://www.youtube.com/embedded')
assertTab('https://www.youtube.com/feed') # Own channel's home page assertTab('https://www.youtube.com/feed') # Own channel's home page
assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668
self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
@ -60,8 +60,8 @@ class TestAllURLsMatching(unittest.TestCase):
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
# def test_youtube_user_matching(self): def test_youtube_user_matching(self):
# self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab']) self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
def test_youtube_feeds(self): def test_youtube_feeds(self):
self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab']) self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])

@ -49,22 +49,17 @@ class BBCCoUkIE(InfoExtractor):
_LOGIN_URL = 'https://account.bbc.com/signin' _LOGIN_URL = 'https://account.bbc.com/signin'
_NETRC_MACHINE = 'bbc' _NETRC_MACHINE = 'bbc'
_MEDIASELECTOR_URLS = [ _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
_MEDIA_SETS = [
# Provides HQ HLS streams with even better quality that pc mediaset but fails # Provides HQ HLS streams with even better quality that pc mediaset but fails
# with geolocation in some cases when it's even not geo restricted at all (e.g. # with geolocation in some cases when it's even not geo restricted at all (e.g.
# http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable. # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s', 'iptv-all',
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s', 'pc',
] ]
_MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist' _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
_NAMESPACES = (
_MEDIASELECTION_NS,
_EMP_PLAYLIST_NS,
)
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.bbc.co.uk/programmes/b039g8p7', 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
@ -261,8 +256,6 @@ class BBCCoUkIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
def _login(self): def _login(self):
username, password = self._get_login_info() username, password = self._get_login_info()
if username is None: if username is None:
@ -307,22 +300,14 @@ class BBCCoUkIE(InfoExtractor):
def _extract_items(self, playlist): def _extract_items(self, playlist):
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS) return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
def _findall_ns(self, element, xpath):
elements = []
for ns in self._NAMESPACES:
elements.extend(element.findall(xpath % ns))
return elements
def _extract_medias(self, media_selection): def _extract_medias(self, media_selection):
error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS) error = media_selection.get('result')
if error is None: if error:
media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS) raise BBCCoUkIE.MediaSelectionError(error)
if error is not None: return media_selection.get('media') or []
raise BBCCoUkIE.MediaSelectionError(error.get('id'))
return self._findall_ns(media_selection, './{%s}media')
def _extract_connections(self, media): def _extract_connections(self, media):
return self._findall_ns(media, './{%s}connection') return media.get('connection') or []
def _get_subtitles(self, media, programme_id): def _get_subtitles(self, media, programme_id):
subtitles = {} subtitles = {}
@ -334,13 +319,13 @@ class BBCCoUkIE(InfoExtractor):
cc_url, programme_id, 'Downloading captions', fatal=False) cc_url, programme_id, 'Downloading captions', fatal=False)
if not isinstance(captions, compat_etree_Element): if not isinstance(captions, compat_etree_Element):
continue continue
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') subtitles['en'] = [
subtitles[lang] = [
{ {
'url': connection.get('href'), 'url': connection.get('href'),
'ext': 'ttml', 'ext': 'ttml',
}, },
] ]
break
return subtitles return subtitles
def _raise_extractor_error(self, media_selection_error): def _raise_extractor_error(self, media_selection_error):
@ -350,10 +335,10 @@ class BBCCoUkIE(InfoExtractor):
def _download_media_selector(self, programme_id): def _download_media_selector(self, programme_id):
last_exception = None last_exception = None
for mediaselector_url in self._MEDIASELECTOR_URLS: for media_set in self._MEDIA_SETS:
try: try:
return self._download_media_selector_url( return self._download_media_selector_url(
mediaselector_url % programme_id, programme_id) self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
except BBCCoUkIE.MediaSelectionError as e: except BBCCoUkIE.MediaSelectionError as e:
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'): if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
last_exception = e last_exception = e
@ -362,8 +347,8 @@ class BBCCoUkIE(InfoExtractor):
self._raise_extractor_error(last_exception) self._raise_extractor_error(last_exception)
def _download_media_selector_url(self, url, programme_id=None): def _download_media_selector_url(self, url, programme_id=None):
media_selection = self._download_xml( media_selection = self._download_json(
url, programme_id, 'Downloading media selection XML', url, programme_id, 'Downloading media selection JSON',
expected_status=(403, 404)) expected_status=(403, 404))
return self._process_media_selector(media_selection, programme_id) return self._process_media_selector(media_selection, programme_id)
@ -377,7 +362,6 @@ class BBCCoUkIE(InfoExtractor):
if kind in ('video', 'audio'): if kind in ('video', 'audio'):
bitrate = int_or_none(media.get('bitrate')) bitrate = int_or_none(media.get('bitrate'))
encoding = media.get('encoding') encoding = media.get('encoding')
service = media.get('service')
width = int_or_none(media.get('width')) width = int_or_none(media.get('width'))
height = int_or_none(media.get('height')) height = int_or_none(media.get('height'))
file_size = int_or_none(media.get('media_file_size')) file_size = int_or_none(media.get('media_file_size'))
@ -392,8 +376,6 @@ class BBCCoUkIE(InfoExtractor):
supplier = connection.get('supplier') supplier = connection.get('supplier')
transfer_format = connection.get('transferFormat') transfer_format = connection.get('transferFormat')
format_id = supplier or conn_kind or protocol format_id = supplier or conn_kind or protocol
if service:
format_id = '%s_%s' % (service, format_id)
# ASX playlist # ASX playlist
if supplier == 'asx': if supplier == 'asx':
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
@ -408,20 +390,11 @@ class BBCCoUkIE(InfoExtractor):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
href, programme_id, ext='mp4', entry_protocol='m3u8_native', href, programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)) m3u8_id=format_id, fatal=False))
if re.search(self._USP_RE, href):
usp_formats = self._extract_m3u8_formats(
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False)
for f in usp_formats:
if f.get('height') and f['height'] > 720:
continue
formats.append(f)
elif transfer_format == 'hds': elif transfer_format == 'hds':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
href, programme_id, f4m_id=format_id, fatal=False)) href, programme_id, f4m_id=format_id, fatal=False))
else: else:
if not service and not supplier and bitrate: if not supplier and bitrate:
format_id += '-%d' % bitrate format_id += '-%d' % bitrate
fmt = { fmt = {
'format_id': format_id, 'format_id': format_id,
@ -554,7 +527,7 @@ class BBCCoUkIE(InfoExtractor):
webpage = self._download_webpage(url, group_id, 'Downloading video page') webpage = self._download_webpage(url, group_id, 'Downloading video page')
error = self._search_regex( error = self._search_regex(
r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<', r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
webpage, 'error', default=None) webpage, 'error', default=None)
if error: if error:
raise ExtractorError(error, expected=True) raise ExtractorError(error, expected=True)
@ -607,16 +580,9 @@ class BBCIE(BBCCoUkIE):
IE_DESC = 'BBC' IE_DESC = 'BBC'
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)' _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
_MEDIASELECTOR_URLS = [ _MEDIA_SETS = [
# Provides HQ HLS streams but fails with geolocation in some cases when it's 'mobile-tablet-main',
# even not geo restricted at all 'pc',
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
# Provides more formats, namely direct mp4 links, but fails on some videos with
# notukerror for non UK (?) users (e.g.
# http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
# Provides fewer formats, but works everywhere for everybody (hopefully)
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
] ]
_TESTS = [{ _TESTS = [{

@ -0,0 +1,60 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
try_get,
urlencode_postdata,
)
class BongaCamsIE(InfoExtractor):
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://de.bongacams.com/azumi-8',
'only_matching': True,
}, {
'url': 'https://cn.bongacams.com/azumi-8',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
channel_id = mobj.group('id')
amf = self._download_json(
'https://%s/tools/amf.php' % host, channel_id,
data=urlencode_postdata((
('method', 'getRoomData'),
('args[]', channel_id),
('args[]', 'false'),
)), headers={'X-Requested-With': 'XMLHttpRequest'})
server_url = amf['localData']['videoServerUrl']
uploader_id = try_get(
amf, lambda x: x['performerData']['username'], compat_str) or channel_id
uploader = try_get(
amf, lambda x: x['performerData']['displayName'], compat_str)
like_count = int_or_none(try_get(
amf, lambda x: x['performerData']['loversCount']))
formats = self._extract_m3u8_formats(
'%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
channel_id, 'mp4', m3u8_id='hls', live=True)
self._sort_formats(formats)
return {
'id': channel_id,
'title': self._live_title(uploader or uploader_id),
'uploader': uploader,
'uploader_id': uploader_id,
'like_count': like_count,
'age_limit': 18,
'is_live': True,
'formats': formats,
}

@ -127,6 +127,7 @@ from .bleacherreport import (
from .blinkx import BlinkxIE from .blinkx import BlinkxIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE
from .bostonglobe import BostonGlobeIE from .bostonglobe import BostonGlobeIE
from .box import BoxIE from .box import BoxIE
from .bpb import BpbIE from .bpb import BpbIE
@ -1492,7 +1493,10 @@ from .weibo import (
WeiboMobileIE WeiboMobileIE
) )
from .weiqitv import WeiqiTVIE from .weiqitv import WeiqiTVIE
from .wistia import WistiaIE from .wistia import (
WistiaIE,
WistiaPlaylistIE,
)
from .worldstarhiphop import WorldStarHipHopIE from .worldstarhiphop import WorldStarHipHopIE
from .wsj import ( from .wsj import (
WSJIE, WSJIE,

@ -2022,22 +2022,6 @@ class GenericIE(InfoExtractor):
}, },
'add_ie': [SpringboardPlatformIE.ie_key()], 'add_ie': [SpringboardPlatformIE.ie_key()],
}, },
{
'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
'info_dict': {
'id': 'uPDB5I9wfp8',
'ext': 'webm',
'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
'upload_date': '20160219',
'uploader': 'Pocoyo - Português (BR)',
'uploader_id': 'PocoyoBrazil',
},
'add_ie': [YoutubeIE.ie_key()],
'params': {
'skip_download': True,
},
},
{ {
'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html', 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
'info_dict': { 'info_dict': {

@ -22,7 +22,7 @@ from ..utils import (
class InstagramIE(InfoExtractor): class InstagramIE(InfoExtractor):
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))' _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
_TESTS = [{ _TESTS = [{
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
'md5': '0d2da106a9d2631273e192b372806516', 'md5': '0d2da106a9d2631273e192b372806516',
@ -35,7 +35,7 @@ class InstagramIE(InfoExtractor):
'timestamp': 1371748545, 'timestamp': 1371748545,
'upload_date': '20130620', 'upload_date': '20130620',
'uploader_id': 'naomipq', 'uploader_id': 'naomipq',
'uploader': 'Naomi Leonor Phan-Quang', 'uploader': 'B E A U T Y F O R A S H E S',
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
'comments': list, 'comments': list,
@ -95,6 +95,9 @@ class InstagramIE(InfoExtractor):
}, { }, {
'url': 'https://www.instagram.com/tv/aye83DjauH/', 'url': 'https://www.instagram.com/tv/aye83DjauH/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.instagram.com/reel/CDUMkliABpa/',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -122,81 +125,92 @@ class InstagramIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
(video_url, description, thumbnail, timestamp, uploader, (media, video_url, description, thumbnail, timestamp, uploader,
uploader_id, like_count, comment_count, comments, height, uploader_id, like_count, comment_count, comments, height,
width) = [None] * 11 width) = [None] * 12
shared_data = try_get(webpage, shared_data = self._parse_json(
(lambda x: self._parse_json( self._search_regex(
self._search_regex( r'window\._sharedData\s*=\s*({.+?});',
r'window\.__additionalDataLoaded\(\'/(?:p|tv)/(?:[^/?#&]+)/\',({.+?})\);', webpage, 'shared data', default='{}'),
x, 'additional data', default='{}'), video_id, fatal=False)
video_id, fatal=False),
lambda x: self._parse_json(
self._search_regex(
r'window\._sharedData\s*=\s*({.+?});',
x, 'shared data', default='{}'),
video_id, fatal=False)['entry_data']['PostPage'][0]),
None)
if shared_data: if shared_data:
media = try_get( media = try_get(
shared_data, shared_data,
(lambda x: x['graphql']['shortcode_media'], (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
lambda x: x['media']), lambda x: x['entry_data']['PostPage'][0]['media']),
dict) dict)
if media: # _sharedData.entry_data.PostPage is empty when authenticated (see
video_url = media.get('video_url') # https://github.com/ytdl-org/youtube-dl/pull/22880)
height = int_or_none(media.get('dimensions', {}).get('height')) if not media:
width = int_or_none(media.get('dimensions', {}).get('width')) additional_data = self._parse_json(
description = try_get( self._search_regex(
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
compat_str) or media.get('caption') webpage, 'additional data', default='{}'),
thumbnail = media.get('display_src') or media.get('thumbnail_src') video_id, fatal=False)
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) if additional_data:
uploader = media.get('owner', {}).get('full_name') media = try_get(
uploader_id = media.get('owner', {}).get('username') additional_data, lambda x: x['graphql']['shortcode_media'],
dict)
def get_count(key, kind): if media:
return int_or_none(try_get( video_url = media.get('video_url')
height = int_or_none(media.get('dimensions', {}).get('height'))
width = int_or_none(media.get('dimensions', {}).get('width'))
description = try_get(
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
compat_str) or media.get('caption')
thumbnail = media.get('display_src') or media.get('display_url')
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
uploader = media.get('owner', {}).get('full_name')
uploader_id = media.get('owner', {}).get('username')
def get_count(keys, kind):
if not isinstance(keys, (list, tuple)):
keys = [keys]
for key in keys:
count = int_or_none(try_get(
media, (lambda x: x['edge_media_%s' % key]['count'], media, (lambda x: x['edge_media_%s' % key]['count'],
lambda x: x['%ss' % kind]['count']))) lambda x: x['%ss' % kind]['count'])))
like_count = get_count('preview_like', 'like') if count is not None:
comment_count = get_count('to_comment', 'comment') return count
like_count = get_count('preview_like', 'like')
comments = [{ comment_count = get_count(
'author': comment.get('user', {}).get('username'), ('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
'author_id': comment.get('user', {}).get('id'),
'id': comment.get('id'), comments = [{
'text': comment.get('text'), 'author': comment.get('user', {}).get('username'),
'timestamp': int_or_none(comment.get('created_at')), 'author_id': comment.get('user', {}).get('id'),
} for comment in media.get( 'id': comment.get('id'),
'comments', {}).get('nodes', []) if comment.get('text')] 'text': comment.get('text'),
if not video_url: 'timestamp': int_or_none(comment.get('created_at')),
edges = try_get( } for comment in media.get(
media, lambda x: x['edge_sidecar_to_children']['edges'], 'comments', {}).get('nodes', []) if comment.get('text')]
list) or [] if not video_url:
if edges: edges = try_get(
entries = [] media, lambda x: x['edge_sidecar_to_children']['edges'],
for edge_num, edge in enumerate(edges, start=1): list) or []
node = try_get(edge, lambda x: x['node'], dict) if edges:
if not node: entries = []
continue for edge_num, edge in enumerate(edges, start=1):
node_video_url = url_or_none(node.get('video_url')) node = try_get(edge, lambda x: x['node'], dict)
if not node_video_url: if not node:
continue continue
entries.append({ node_video_url = url_or_none(node.get('video_url'))
'id': node.get('shortcode') or node['id'], if not node_video_url:
'title': 'Video %d' % edge_num, continue
'url': node_video_url, entries.append({
'thumbnail': node.get('display_url'), 'id': node.get('shortcode') or node['id'],
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])), 'title': 'Video %d' % edge_num,
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])), 'url': node_video_url,
'view_count': int_or_none(node.get('video_view_count')), 'thumbnail': node.get('display_url'),
}) 'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
return self.playlist_result( 'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
entries, video_id, 'view_count': int_or_none(node.get('video_view_count')),
'Post by %s' % uploader_id if uploader_id else None, })
description) return self.playlist_result(
entries, video_id,
'Post by %s' % uploader_id if uploader_id else None,
description)
if not video_url: if not video_url:
video_url = self._og_search_video_url(webpage, secure=False) video_url = self._og_search_video_url(webpage, secure=False)

@ -288,14 +288,24 @@ class PornHubIE(PornHubBaseIE):
video_urls.append((v_url, None)) video_urls.append((v_url, None))
video_urls_set.add(v_url) video_urls_set.add(v_url)
def parse_quality_items(quality_items):
q_items = self._parse_json(quality_items, video_id, fatal=False)
if not isinstance(q_items, list):
return
for item in q_items:
if isinstance(item, dict):
add_video_url(item.get('url'))
if not video_urls: if not video_urls:
FORMAT_PREFIXES = ('media', 'quality') FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
js_vars = extract_js_vars( js_vars = extract_js_vars(
webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
default=None) default=None)
if js_vars: if js_vars:
for key, format_url in js_vars.items(): for key, format_url in js_vars.items():
if any(key.startswith(p) for p in FORMAT_PREFIXES): if key.startswith(FORMAT_PREFIXES[-1]):
parse_quality_items(format_url)
elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
add_video_url(format_url) add_video_url(format_url)
if not video_urls and re.search( if not video_urls and re.search(
r'<[^>]+\bid=["\']lockedPlayer', webpage): r'<[^>]+\bid=["\']lockedPlayer', webpage):
@ -351,12 +361,16 @@ class PornHubIE(PornHubBaseIE):
r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
webpage, 'uploader', default=None) webpage, 'uploader', default=None)
def extract_vote_count(kind, name):
return self._extract_count(
(r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
webpage, name)
view_count = self._extract_count( view_count = self._extract_count(
r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view') r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
like_count = self._extract_count( like_count = extract_vote_count('Up', 'like')
r'<span[^>]+class="votesUp"[^>]*>([\d,\.]+)</span>', webpage, 'like') dislike_count = extract_vote_count('Down', 'dislike')
dislike_count = self._extract_count(
r'<span[^>]+class="votesDown"[^>]*>([\d,\.]+)</span>', webpage, 'dislike')
comment_count = self._extract_count( comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')

@ -7,17 +7,24 @@ from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
merge_dicts, merge_dicts,
orderedSet,
parse_duration, parse_duration,
parse_resolution, parse_resolution,
str_to_int, str_to_int,
url_or_none, url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin,
) )
class SpankBangIE(InfoExtractor): class SpankBangIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b' _VALID_URL = r'''(?x)
https?://
(?:[^/]+\.)?spankbang\.com/
(?:
(?P<id>[\da-z]+)/(?:video|play|embed)\b|
[\da-z]+-(?P<id_2>[\da-z]+)/playlist/[^/?#&]+
)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://spankbang.com/3vvn/video/fantasy+solo', 'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
'md5': '1cc433e1d6aa14bc376535b8679302f7', 'md5': '1cc433e1d6aa14bc376535b8679302f7',
@ -57,10 +64,14 @@ class SpankBangIE(InfoExtractor):
}, { }, {
'url': 'https://spankbang.com/2y3td/embed/', 'url': 'https://spankbang.com/2y3td/embed/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://spankbang.com/2v7ik-7ecbgu/playlist/latina+booty',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id') or mobj.group('id_2')
webpage = self._download_webpage( webpage = self._download_webpage(
url.replace('/%s/embed' % video_id, '/%s/video' % video_id), url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
video_id, headers={'Cookie': 'country=US'}) video_id, headers={'Cookie': 'country=US'})
@ -155,30 +166,33 @@ class SpankBangIE(InfoExtractor):
class SpankBangPlaylistIE(InfoExtractor): class SpankBangPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+' _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/(?P<display_id>[^/]+)'
_TEST = { _TEST = {
'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties', 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
'info_dict': { 'info_dict': {
'id': 'ug0k', 'id': 'ug0k',
'title': 'Big Ass Titties', 'title': 'Big Ass Titties',
}, },
'playlist_mincount': 50, 'playlist_mincount': 40,
} }
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage( webpage = self._download_webpage(
url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
entries = [self.url_result( entries = [self.url_result(
'https://spankbang.com/%s/video' % video_id, urljoin(url, mobj.group('path')),
ie=SpankBangIE.ie_key(), video_id=video_id) ie=SpankBangIE.ie_key(), video_id=mobj.group('id'))
for video_id in orderedSet(re.findall( for mobj in re.finditer(
r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))] r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/%s(?:(?!\1).)*)\1'
% re.escape(display_id), webpage)]
title = self._html_search_regex( title = self._html_search_regex(
r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title', r'<h1>([^<]+)\s+playlist\s*<', webpage, 'playlist title',
fatal=False) fatal=False)
return self.playlist_result(entries, playlist_id, title) return self.playlist_result(entries, playlist_id, title)

@ -3,50 +3,62 @@ from __future__ import unicode_literals
from .adobepass import AdobePassIE from .adobepass import AdobePassIE
from ..utils import ( from ..utils import (
extract_attributes, int_or_none,
update_url_query,
smuggle_url, smuggle_url,
update_url_query,
) )
class SproutIE(AdobePassIE): class SproutIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
_TEST = { _TESTS = [{
'url': 'http://www.sproutonline.com/watch/cowboy-adventure', 'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
'md5': '74bf14128578d1e040c3ebc82088f45f',
'info_dict': { 'info_dict': {
'id': '9dexnwtmh8_X', 'id': 'bm0foJFaTKqb',
'ext': 'mp4', 'ext': 'mp4',
'title': 'A Cowboy Adventure', 'title': 'Robot Bike Race',
'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.', 'description': 'md5:436b1d97117cc437f54c383f4debc66d',
'timestamp': 1437758640, 'timestamp': 1606148940,
'upload_date': '20150724', 'upload_date': '20201123',
'uploader': 'NBCU-SPROUT-NEW', 'uploader': 'NBCU-MPAT',
} },
} 'params': {
'skip_download': True,
},
}, {
'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
'only_matching': True,
}, {
'url': 'https://www.universalkids.com/watch/robot-bike-race',
'only_matching': True,
}]
_GEO_COUNTRIES = ['US']
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) mpx_metadata = self._download_json(
video_component = self._search_regex( # http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
r'(?s)(<div[^>]+data-component="video"[^>]*?>)', 'https://www.universalkids.com/_api/videos/' + display_id,
webpage, 'video component', default=None) display_id)['mpxMetadata']
if video_component: media_pid = mpx_metadata['mediaPid']
options = self._parse_json(extract_attributes( theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
video_component)['data-options'], video_id) query = {
theplatform_url = options['video'] 'mbr': 'true',
query = { 'manifest': 'm3u',
'mbr': 'true', }
'manifest': 'm3u', if mpx_metadata.get('entitlement') == 'auth':
} query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
if options.get('protected'): theplatform_url = smuggle_url(
query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout') update_url_query(theplatform_url, query), {
theplatform_url = smuggle_url(update_url_query( 'force_smil_url': True,
theplatform_url, query), {'force_smil_url': True}) 'geo_countries': self._GEO_COUNTRIES,
else: })
iframe = self._search_regex( return {
r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)', '_type': 'url_transparent',
webpage, 'iframe') 'id': media_pid,
theplatform_url = extract_attributes(iframe)['src'] 'url': theplatform_url,
'series': mpx_metadata.get('seriesName'),
return self.url_result(theplatform_url, 'ThePlatform') 'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
'ie_key': 'ThePlatform',
}

@ -234,6 +234,9 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
self._initialize_geo_bypass({
'countries': smuggled_data.get('geo_countries'),
})
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
provider_id = mobj.group('provider_id') provider_id = mobj.group('provider_id')

@ -1,18 +1,22 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
parse_duration, parse_duration,
parse_iso8601,
) )
class TheWeatherChannelIE(ThePlatformIE): class TheWeatherChannelIE(ThePlatformIE):
_VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))'
_TESTS = [{ _TESTS = [{
'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock', 'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
'md5': 'ab924ac9574e79689c24c6b95e957def', 'md5': 'c4cbe74c9c17c5676b704b950b73dd92',
'info_dict': { 'info_dict': {
'id': 'cc82397e-cc3f-4d11-9390-a785add090e8', 'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',
'ext': 'mp4', 'ext': 'mp4',
@ -20,18 +24,33 @@ class TheWeatherChannelIE(ThePlatformIE):
'description': 'md5:55606ce1378d4c72e6545e160c9d9695', 'description': 'md5:55606ce1378d4c72e6545e160c9d9695',
'uploader': 'TWC - Digital (No Distro)', 'uploader': 'TWC - Digital (No Distro)',
'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c', 'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c',
'upload_date': '20160720',
'timestamp': 1469018835,
} }
}, {
'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) asset_name, locale, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) if not locale:
drupal_settings = self._parse_json(self._search_regex( locale = 'en-US'
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', video_data = list(self._download_json(
webpage, 'drupal settings'), display_id) 'https://weather.com/api/v1/p/redux-dal', display_id, data=json.dumps([{
video_id = drupal_settings['twc']['contexts']['node']['uuid'] 'name': 'getCMSAssetsUrlConfig',
video_data = self._download_json( 'params': {
'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id) 'language': locale.replace('-', '_'),
'query': {
'assetName': {
'$in': asset_name,
},
},
}
}]).encode(), headers={
'Content-Type': 'application/json',
})['dal']['getCMSAssetsUrlConfig'].values())[0]['data'][0]
video_id = video_data['id']
seo_meta = video_data.get('seometa', {}) seo_meta = video_data.get('seometa', {})
title = video_data.get('title') or seo_meta['title'] title = video_data.get('title') or seo_meta['title']
@ -66,6 +85,8 @@ class TheWeatherChannelIE(ThePlatformIE):
}) })
self._sort_formats(formats) self._sort_formats(formats)
cc_url = video_data.get('cc_url')
return { return {
'id': video_id, 'id': video_id,
'display_id': display_id, 'display_id': display_id,
@ -74,6 +95,8 @@ class TheWeatherChannelIE(ThePlatformIE):
'duration': parse_duration(video_data.get('duration')), 'duration': parse_duration(video_data.get('duration')),
'uploader': video_data.get('providername'), 'uploader': video_data.get('providername'),
'uploader_id': video_data.get('providerid'), 'uploader_id': video_data.get('providerid'),
'timestamp': parse_iso8601(video_data.get('publishdate')),
'subtitles': {locale[:2]: [{'url': cc_url}]} if cc_url else None,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'formats': formats, 'formats': formats,
} }

@ -5,79 +5,34 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none,
float_or_none, float_or_none,
int_or_none,
try_get,
unescapeHTML, unescapeHTML,
) )
class WistiaIE(InfoExtractor): class WistiaBaseIE(InfoExtractor):
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})' _VALID_ID_REGEX = r'(?P<id>[a-z0-9]{10})'
_VALID_URL_BASE = r'https?://(?:fast\.)?wistia\.(?:net|com)/embed/'
_EMBED_BASE_URL = 'http://fast.wistia.com/embed/' _EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
_TESTS = [{ def _download_embed_config(self, config_type, config_id, referer):
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', base_url = self._EMBED_BASE_URL + '%ss/%s' % (config_type, config_id)
'md5': 'cafeb56ec0c53c18c97405eecb3133df', embed_config = self._download_json(
'info_dict': { base_url + '.json', config_id, headers={
'id': 'sh7fpupwlt', 'Referer': referer if referer.startswith('http') else base_url, # Some videos require this.
'ext': 'mov',
'title': 'Being Resourceful',
'description': 'a Clients From Hell Video Series video from worldwidewebhosting',
'upload_date': '20131204',
'timestamp': 1386185018,
'duration': 117,
},
}, {
'url': 'wistia:sh7fpupwlt',
'only_matching': True,
}, {
# with hls video
'url': 'wistia:807fafadvk',
'only_matching': True,
}, {
'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
'only_matching': True,
}, {
'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
'only_matching': True,
}]
# https://wistia.com/support/embed-and-share/video-on-your-website
@staticmethod
def _extract_url(webpage):
urls = WistiaIE._extract_urls(webpage)
return urls[0] if urls else None
@staticmethod
def _extract_urls(webpage):
urls = []
for match in re.finditer(
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
urls.append(unescapeHTML(match.group('url')))
for match in re.finditer(
r'''(?sx)
<div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
''', webpage):
urls.append('wistia:%s' % match.group('id'))
for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
urls.append('wistia:%s' % match.group('id'))
return urls
def _real_extract(self, url):
video_id = self._match_id(url)
data_json = self._download_json(
self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id,
# Some videos require this.
headers={
'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id,
}) })
if data_json.get('error'): if isinstance(embed_config, dict) and embed_config.get('error'):
raise ExtractorError( raise ExtractorError(
'Error while getting the playlist', expected=True) 'Error while getting the playlist', expected=True)
data = data_json['media'] return embed_config
def _extract_media(self, embed_config):
data = embed_config['media']
video_id = data['hashedId']
title = data['name'] title = data['name']
formats = [] formats = []
@ -160,3 +115,85 @@ class WistiaIE(InfoExtractor):
'timestamp': int_or_none(data.get('createdAt')), 'timestamp': int_or_none(data.get('createdAt')),
'subtitles': subtitles, 'subtitles': subtitles,
} }
class WistiaIE(WistiaBaseIE):
_VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
_TESTS = [{
# with hls video
'url': 'wistia:807fafadvk',
'md5': 'daff0f3687a41d9a71b40e0e8c2610fe',
'info_dict': {
'id': '807fafadvk',
'ext': 'mp4',
'title': 'Drip Brennan Dunn Workshop',
'description': 'a JV Webinars video',
'upload_date': '20160518',
'timestamp': 1463607249,
'duration': 4987.11,
},
}, {
'url': 'wistia:sh7fpupwlt',
'only_matching': True,
}, {
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
'only_matching': True,
}, {
'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
'only_matching': True,
}, {
'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
'only_matching': True,
}]
# https://wistia.com/support/embed-and-share/video-on-your-website
@staticmethod
def _extract_url(webpage):
urls = WistiaIE._extract_urls(webpage)
return urls[0] if urls else None
@staticmethod
def _extract_urls(webpage):
urls = []
for match in re.finditer(
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
urls.append(unescapeHTML(match.group('url')))
for match in re.finditer(
r'''(?sx)
<div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
''', webpage):
urls.append('wistia:%s' % match.group('id'))
for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
urls.append('wistia:%s' % match.group('id'))
return urls
def _real_extract(self, url):
video_id = self._match_id(url)
embed_config = self._download_embed_config('media', video_id, url)
return self._extract_media(embed_config)
class WistiaPlaylistIE(WistiaBaseIE):
_VALID_URL = r'%splaylists/%s' % (WistiaIE._VALID_URL_BASE, WistiaIE._VALID_ID_REGEX)
_TEST = {
'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc',
'info_dict': {
'id': 'aodt9etokc',
},
'playlist_count': 3,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
playlist = self._download_embed_config('playlist', playlist_id, url)
entries = []
for media in (try_get(playlist, lambda x: x[0]['medias']) or []):
embed_config = media.get('embed_config')
if not embed_config:
continue
entries.append(self._extract_media(embed_config))
return self.playlist_result(entries, playlist_id)

@ -64,9 +64,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}' _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
_RESERVED_NAMES = ( _RESERVED_NAMES = (
r'course|embed|channel|c|user|playlist|watch|w|results|storefront|oops|' r'embed|e|channel|c|user|playlist|watch|w|v|results|shared|'
r'shared|index|account|reporthistory|t/terms|about|upload|signin|logout|' r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
r'feed/(watch_later|history|subscriptions|library|trending|recommended)') r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
_NETRC_MACHINE = 'youtube' _NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided # If True it will raise an error if no login info is provided
@ -2544,7 +2544,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
feed/| feed/|
(?:playlist|watch)\?.*?\blist= (?:playlist|watch)\?.*?\blist=
)| )|
(?!(%s)([/#?]|$)) # Direct URLs (?!(?:%s)\b) # Direct URLs
) )
(?P<id>[^/?\#&]+) (?P<id>[^/?\#&]+)
''' % YoutubeBaseInfoExtractor._RESERVED_NAMES ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
@ -2813,13 +2813,22 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
# inline playlist with not always working continuations # inline playlist with not always working continuations
'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C', 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
'only_matching': True, 'only_matching': True,
} }, {
# TODO 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
# { 'only_matching': True,
# 'url': 'https://www.youtube.com/TheYoungTurks/live', }, {
# 'only_matching': True, 'url': 'https://www.youtube.com/course',
# } 'only_matching': True,
] }, {
'url': 'https://www.youtube.com/zsecurity',
'only_matching': True,
}, {
'url': 'http://www.youtube.com/NASAgovVideo/videos',
'only_matching': True,
}, {
'url': 'https://www.youtube.com/TheYoungTurks/live',
'only_matching': True,
}]
def _extract_channel_id(self, webpage): def _extract_channel_id(self, webpage):
channel_id = self._html_search_meta( channel_id = self._html_search_meta(

Loading…
Cancel
Save