Merge ytdl

Upto:
[generic] Extract RSS video itunes metadata
f2c704e112

Except:
youtube_dl/extractor/tiktok.py - fb626c0586
youtube_dlc/extractor/youtube.py

Ignored:
.travis.yml
.github/ISSUE_TEMPLATE/1_broken_site.md
.github/ISSUE_TEMPLATE/2_site_support_request.md
.github/ISSUE_TEMPLATE/3_site_feature_request.md
.github/ISSUE_TEMPLATE/4_bug_report.md
.github/ISSUE_TEMPLATE/5_feature_request.md
youtube_dlc/version.py
Changelog
pull/280/head
pukkandan 5 years ago
parent 1652b626dc
commit 242148cb6f

@ -313,6 +313,7 @@
- **FrontendMasters** - **FrontendMasters**
- **FrontendMastersCourse** - **FrontendMastersCourse**
- **FrontendMastersLesson** - **FrontendMastersLesson**
- **FujiTVFODPlus7**
- **Funimation** - **Funimation**
- **Funk** - **Funk**
- **Fusion** - **Fusion**
@ -493,6 +494,7 @@
- **META** - **META**
- **metacafe** - **metacafe**
- **Metacritic** - **Metacritic**
- **mewatch**
- **Mgoon** - **Mgoon**
- **MGTV**: 芒果TV - **MGTV**: 芒果TV
- **MiaoPai** - **MiaoPai**
@ -719,6 +721,7 @@
- **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:singer**: QQ音乐 - 歌手
- **qqmusic:toplist**: QQ音乐 - 排行榜 - **qqmusic:toplist**: QQ音乐 - 排行榜
- **QuantumTV** - **QuantumTV**
- **Qub**
- **Quickline** - **Quickline**
- **QuicklineLive** - **QuicklineLive**
- **R7** - **R7**
@ -923,6 +926,7 @@
- **ThisAV** - **ThisAV**
- **ThisOldHouse** - **ThisOldHouse**
- **TikTok** - **TikTok**
- **TikTokUser** (Currently broken)
- **tinypic**: tinypic.com videos - **tinypic**: tinypic.com videos
- **TMZ** - **TMZ**
- **TMZArticle** - **TMZArticle**
@ -961,6 +965,7 @@
- **TVANouvellesArticle** - **TVANouvellesArticle**
- **TVC** - **TVC**
- **TVCArticle** - **TVCArticle**
- **TVer**
- **tvigle**: Интернет-телевидение Tvigle.ru - **tvigle**: Интернет-телевидение Tvigle.ru
- **tvland.com** - **tvland.com**
- **TVN24** - **TVN24**
@ -1142,6 +1147,8 @@
- **yahoo:japannews**: Yahoo! Japan News - **yahoo:japannews**: Yahoo! Japan News
- **YandexDisk** - **YandexDisk**
- **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:album**: Яндекс.Музыка - Альбом
- **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы
- **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки
- **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист
- **yandexmusic:track**: Яндекс.Музыка - Трек - **yandexmusic:track**: Яндекс.Музыка - Трек
- **YandexVideo** - **YandexVideo**

@ -1679,7 +1679,7 @@ class YoutubeDL(object):
if req_format is None: if req_format is None:
req_format = self._default_format_spec(info_dict, download=download) req_format = self._default_format_spec(info_dict, download=download)
if self.params.get('verbose'): if self.params.get('verbose'):
self.to_stdout('[debug] Default format spec: %s' % req_format) self._write_string('[debug] Default format spec: %s\n' % req_format)
format_selector = self.build_format_selector(req_format) format_selector = self.build_format_selector(req_format)
@ -1889,7 +1889,7 @@ class YoutubeDL(object):
for ph in self._progress_hooks: for ph in self._progress_hooks:
fd.add_progress_hook(ph) fd.add_progress_hook(ph)
if self.params.get('verbose'): if self.params.get('verbose'):
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info, subtitle) return fd.download(name, info, subtitle)
subtitles_are_requested = any([self.params.get('writesubtitles', False), subtitles_are_requested = any([self.params.get('writesubtitles', False),

@ -2615,33 +2615,32 @@ class InfoExtractor(object):
hls_host = hosts.get('hls') hls_host = hosts.get('hls')
if hls_host: if hls_host:
m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url) m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
formats.extend(self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
http_host = hosts.get('http') http_host = hosts.get('http')
if http_host and 'hdnea=' not in manifest_url: if http_host and m3u8_formats and 'hdnea=' not in m3u8_url:
REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+' REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',') qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
qualities_length = len(qualities) qualities_length = len(qualities)
if len(formats) in (qualities_length + 1, qualities_length * 2 + 1): if len(m3u8_formats) in (qualities_length, qualities_length + 1):
i = 0 i = 0
http_formats = [] for f in m3u8_formats:
for f in formats: if f['vcodec'] != 'none':
if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none':
for protocol in ('http', 'https'): for protocol in ('http', 'https'):
http_f = f.copy() http_f = f.copy()
del http_f['manifest_url'] del http_f['manifest_url']
http_url = re.sub( http_url = re.sub(
REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url']) REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url'])
http_f.update({ http_f.update({
'format_id': http_f['format_id'].replace('hls-', protocol + '-'), 'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
'url': http_url, 'url': http_url,
'protocol': protocol, 'protocol': protocol,
}) })
http_formats.append(http_f) formats.append(http_f)
i += 1 i += 1
formats.extend(http_formats)
return formats return formats

@ -10,6 +10,8 @@ from ..utils import (
find_xpath_attr, find_xpath_attr,
get_element_by_class, get_element_by_class,
int_or_none, int_or_none,
js_to_json,
merge_dicts,
smuggle_url, smuggle_url,
unescapeHTML, unescapeHTML,
) )
@ -98,6 +100,26 @@ class CSpanIE(InfoExtractor):
bc_attr['data-bcid']) bc_attr['data-bcid'])
return self.url_result(smuggle_url(bc_url, {'source_url': url})) return self.url_result(smuggle_url(bc_url, {'source_url': url}))
def add_referer(formats):
for f in formats:
f.setdefault('http_headers', {})['Referer'] = url
# As of 01.12.2020 this path looks to cover all cases making the rest
# of the code unnecessary
jwsetup = self._parse_json(
self._search_regex(
r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup',
default='{}'),
video_id, transform_source=js_to_json, fatal=False)
if jwsetup:
info = self._parse_jwplayer_data(
jwsetup, video_id, require_title=False, m3u8_id='hls',
base_url=url)
add_referer(info['formats'])
ld_info = self._search_json_ld(webpage, video_id, default={})
return merge_dicts(info, ld_info)
# Obsolete
# We first look for clipid, because clipprog always appears before # We first look for clipid, because clipprog always appears before
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
results = list(filter(None, (re.search(p, webpage) for p in patterns))) results = list(filter(None, (re.search(p, webpage) for p in patterns)))
@ -165,6 +187,7 @@ class CSpanIE(InfoExtractor):
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
path, video_id, 'mp4', entry_protocol='m3u8_native', path, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }] m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
add_referer(formats)
self._sort_formats(formats) self._sort_formats(formats)
entries.append({ entries.append({
'id': '%s_%d' % (video_id, partnum + 1), 'id': '%s_%d' % (video_id, partnum + 1),

@ -29,7 +29,7 @@ class DRTVIE(InfoExtractor):
https?:// https?://
(?: (?:
(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*| (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/ (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
) )
(?P<id>[\da-z_-]+) (?P<id>[\da-z_-]+)
''' '''
@ -111,6 +111,9 @@ class DRTVIE(InfoExtractor):
}, { }, {
'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769', 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.dr.dk/drtv/program/jagten_220924',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

@ -409,6 +409,7 @@ from .frontendmasters import (
FrontendMastersLessonIE, FrontendMastersLessonIE,
FrontendMastersCourseIE FrontendMastersCourseIE
) )
from .fujitv import FujiTVFODPlus7IE
from .funimation import FunimationIE from .funimation import FunimationIE
from .funk import FunkIE from .funk import FunkIE
from .fusion import FusionIE from .fusion import FusionIE
@ -1220,7 +1221,10 @@ from .tnaflix import (
EMPFlixIE, EMPFlixIE,
MovieFapIE, MovieFapIE,
) )
from .toggle import ToggleIE from .toggle import (
ToggleIE,
MeWatchIE,
)
from .tonline import TOnlineIE from .tonline import TOnlineIE
from .toongoggles import ToonGogglesIE from .toongoggles import ToonGogglesIE
from .toutv import TouTvIE from .toutv import TouTvIE
@ -1253,7 +1257,10 @@ from .tv2dk import (
from .tv2hu import TV2HuIE from .tv2hu import TV2HuIE
from .tv4 import TV4IE from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE from .tv5mondeplus import TV5MondePlusIE
from .tva import TVAIE from .tva import (
TVAIE,
QubIE,
)
from .tvanouvelles import ( from .tvanouvelles import (
TVANouvellesIE, TVANouvellesIE,
TVANouvellesArticleIE, TVANouvellesArticleIE,
@ -1262,6 +1269,7 @@ from .tvc import (
TVCIE, TVCIE,
TVCArticleIE, TVCArticleIE,
) )
from .tver import TVerIE
from .tvigle import TvigleIE from .tvigle import TvigleIE
from .tvland import TVLandIE from .tvland import TVLandIE
from .tvn24 import TVN24IE from .tvn24 import TVN24IE
@ -1515,6 +1523,8 @@ from .yandexmusic import (
YandexMusicTrackIE, YandexMusicTrackIE,
YandexMusicAlbumIE, YandexMusicAlbumIE,
YandexMusicPlaylistIE, YandexMusicPlaylistIE,
YandexMusicArtistTracksIE,
YandexMusicArtistAlbumsIE,
) )
from .yandexvideo import YandexVideoIE from .yandexvideo import YandexVideoIE
from .yapfiles import YapFilesIE from .yapfiles import YapFilesIE

@ -0,0 +1,35 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
class FujiTVFODPlus7IE(InfoExtractor):
_VALID_URL = r'https?://i\.fod\.fujitv\.co\.jp/plus7/web/[0-9a-z]{4}/(?P<id>[0-9a-z]+)'
_BASE_URL = 'http://i.fod.fujitv.co.jp/'
_BITRATE_MAP = {
300: (320, 180),
800: (640, 360),
1200: (1280, 720),
2000: (1280, 720),
}
def _real_extract(self, url):
video_id = self._match_id(url)
formats = self._extract_m3u8_formats(
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
for f in formats:
wh = self._BITRATE_MAP.get(f.get('tbr'))
if wh:
f.update({
'width': wh[0],
'height': wh[1],
})
self._sort_formats(formats)
return {
'id': video_id,
'title': video_id,
'formats': formats,
'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id,
}

@ -1,16 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .once import OnceIE from .once import OnceIE
from ..compat import ( from ..compat import compat_urllib_parse_unquote
compat_urllib_parse_unquote,
)
from ..utils import (
unescapeHTML,
url_basename,
dict_get,
)
class GameSpotIE(OnceIE): class GameSpotIE(OnceIE):
@ -24,17 +15,16 @@ class GameSpotIE(OnceIE):
'title': 'Arma 3 - Community Guide: SITREP I', 'title': 'Arma 3 - Community Guide: SITREP I',
'description': 'Check out this video where some of the basics of Arma 3 is explained.', 'description': 'Check out this video where some of the basics of Arma 3 is explained.',
}, },
'skip': 'manifest URL give HTTP Error 404: Not Found',
}, { }, {
'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/', 'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
'md5': '173ea87ad762cf5d3bf6163dceb255a6',
'info_dict': { 'info_dict': {
'id': 'gs-2300-6424837', 'id': 'gs-2300-6424837',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Now Playing - The Witcher 3: Wild Hunt', 'title': 'Now Playing - The Witcher 3: Wild Hunt',
'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.', 'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
}, },
'params': {
'skip_download': True, # m3u8 downloads
},
}, { }, {
'url': 'https://www.gamespot.com/videos/embed/6439218/', 'url': 'https://www.gamespot.com/videos/embed/6439218/',
'only_matching': True, 'only_matching': True,
@ -49,90 +39,40 @@ class GameSpotIE(OnceIE):
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
data_video_json = self._search_regex( data_video = self._parse_json(self._html_search_regex(
r'data-video=["\'](.*?)["\']', webpage, 'data video') r'data-video=(["\'])({.*?})\1', webpage,
data_video = self._parse_json(unescapeHTML(data_video_json), page_id) 'video data', group=2), page_id)
title = compat_urllib_parse_unquote(data_video['title'])
streams = data_video['videoStreams'] streams = data_video['videoStreams']
manifest_url = None
formats = [] formats = []
f4m_url = streams.get('f4m_stream')
if f4m_url: m3u8_url = streams.get('adaptive_stream')
manifest_url = f4m_url
formats.extend(self._extract_f4m_formats(
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
if m3u8_url: if m3u8_url:
manifest_url = m3u8_url
m3u8_formats = self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
m3u8_url, page_id, 'mp4', 'm3u8_native', m3u8_url, page_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False) m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats) for f in m3u8_formats:
progressive_url = dict_get( formats.append(f)
streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr')) http_f = f.copy()
if progressive_url and manifest_url: del http_f['manifest_url']
qualities_basename = self._search_regex( http_f.update({
r'/([^/]+)\.csmil/', 'format_id': f['format_id'].replace('hls-', 'http-'),
manifest_url, 'qualities basename', default=None) 'protocol': 'http',
if qualities_basename: 'url': f['url'].replace('.m3u8', '.mp4'),
QUALITIES_RE = r'((,\d+)+,?)' })
qualities = self._search_regex( formats.append(http_f)
QUALITIES_RE, qualities_basename,
'qualities', default=None)
if qualities:
qualities = list(map(lambda q: int(q), qualities.strip(',').split(',')))
qualities.sort()
http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename)
http_url_basename = url_basename(progressive_url)
if m3u8_formats:
self._sort_formats(m3u8_formats)
m3u8_formats = list(filter(
lambda f: f.get('vcodec') != 'none', m3u8_formats))
if len(qualities) == len(m3u8_formats):
for q, m3u8_format in zip(qualities, m3u8_formats):
f = m3u8_format.copy()
f.update({
'url': progressive_url.replace(
http_url_basename, http_template % q),
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(f)
else:
for q in qualities:
formats.append({
'url': progressive_url.replace(
http_url_basename, http_template % q),
'ext': 'mp4',
'format_id': 'http-%d' % q,
'tbr': q,
})
onceux_json = self._search_regex( mpd_url = streams.get('adaptive_dash')
r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None) if mpd_url:
if onceux_json: formats.extend(self._extract_mpd_formats(
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') mpd_url, page_id, mpd_id='dash', fatal=False))
if onceux_url:
formats.extend(self._extract_once_formats(re.sub(
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
http_formats_preference=-1))
if not formats:
for quality in ['sd', 'hd']:
# It's actually a link to a flv file
flv_url = streams.get('f4m_{0}'.format(quality))
if flv_url is not None:
formats.append({
'url': flv_url,
'ext': 'flv',
'format_id': quality,
})
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': data_video['guid'], 'id': data_video.get('guid') or page_id,
'display_id': page_id, 'display_id': page_id,
'title': compat_urllib_parse_unquote(data_video['title']), 'title': title,
'formats': formats, 'formats': formats,
'description': self._html_search_meta('description', webpage), 'description': self._html_search_meta('description', webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),

@ -201,11 +201,19 @@ class GenericIE(InfoExtractor):
{ {
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
'info_dict': { 'info_dict': {
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', 'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
'ext': 'm4v', 'title': 'MSNBC Rachel Maddow (video)',
'upload_date': '20150228', 'description': 're:.*her unique approach to storytelling.*',
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', },
} 'playlist': [{
'info_dict': {
'ext': 'mov',
'id': 'pdv_maddow_netcast_mov-12-03-2020-223726',
'title': 'MSNBC Rachel Maddow (video) - 12-03-2020-223726',
'description': 're:.*her unique approach to storytelling.*',
'upload_date': '20201204',
},
}],
}, },
# RSS feed with enclosures and unsupported link URLs # RSS feed with enclosures and unsupported link URLs
{ {
@ -2202,6 +2210,7 @@ class GenericIE(InfoExtractor):
'_type': 'url_transparent', '_type': 'url_transparent',
'url': next_url, 'url': next_url,
'title': it.find('title').text, 'title': it.find('title').text,
'description': xpath_text(it, 'description', default=None),
}) })
return { return {

@ -23,7 +23,7 @@ class MediasetIE(ThePlatformBaseIE):
https?:// https?://
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/ (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
(?: (?:
(?:video|on-demand)/(?:[^/]+/)+[^/]+_| (?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
player/index\.html\?.*?\bprogramGuid= player/index\.html\?.*?\bprogramGuid=
) )
)(?P<id>[0-9A-Z]{16,}) )(?P<id>[0-9A-Z]{16,})
@ -88,6 +88,9 @@ class MediasetIE(ThePlatformBaseIE):
}, { }, {
'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135', 'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102',
'only_matching': True,
}] }]
@staticmethod @staticmethod

@ -10,7 +10,7 @@ class NhkVodIE(InfoExtractor):
# Content available only for a limited period of time. Visit # Content available only for a limited period of time. Visit
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
_TESTS = [{ _TESTS = [{
# clip # video clip
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
'md5': '256a1be14f48d960a7e61e2532d95ec3', 'md5': '256a1be14f48d960a7e61e2532d95ec3',
'info_dict': { 'info_dict': {
@ -21,6 +21,19 @@ class NhkVodIE(InfoExtractor):
'timestamp': 1565965194, 'timestamp': 1565965194,
'upload_date': '20190816', 'upload_date': '20190816',
}, },
}, {
# audio clip
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/r_inventions-20201104-1/',
'info_dict': {
'id': 'r_inventions-20201104-1-en',
'ext': 'm4a',
'title': "Japan's Top Inventions - Miniature Video Cameras",
'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b',
},
'params': {
# m3u8 download
'skip_download': True,
},
}, { }, {
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
'only_matching': True, 'only_matching': True,

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import itertools
import random
import re import re
from .common import InfoExtractor from .common import InfoExtractor
@ -12,17 +14,57 @@ from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
js_to_json,
NO_DEFAULT,
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
try_get, try_get,
urljoin,
url_or_none, url_or_none,
) )
class NRKBaseIE(InfoExtractor): class NRKBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['NO'] _GEO_COUNTRIES = ['NO']
_CDN_REPL_REGEX = r'''(?x)://
(?:
nrkod\d{1,2}-httpcache0-47115-cacheod0\.dna\.ip-only\.net/47115-cacheod0|
nrk-od-no\.telenorcdn\.net|
minicdn-od\.nrk\.no/od/nrkhd-osl-rr\.netwerk\.no/no
)/'''
def _extract_nrk_formats(self, asset_url, video_id):
if re.match(r'https?://[^/]+\.akamaihd\.net/i/', asset_url):
return self._extract_akamai_formats(
re.sub(r'(?:b=\d+-\d+|__a__=off)&?', '', asset_url), video_id)
asset_url = re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url)
formats = self._extract_m3u8_formats(
asset_url, video_id, 'mp4', 'm3u8_native', fatal=False)
if not formats and re.search(self._CDN_REPL_REGEX, asset_url):
formats = self._extract_m3u8_formats(
re.sub(self._CDN_REPL_REGEX, '://nrk-od-%02d.akamaized.net/no/' % random.randint(0, 99), asset_url),
video_id, 'mp4', 'm3u8_native', fatal=False)
return formats
def _raise_error(self, data):
MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
'NoProgramRights': 'Ikke tilgjengelig',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
message_type = data.get('messageType', '')
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type or try_get(data, lambda x: x['usageRights']['isGeoBlocked']) is True:
self.raise_geo_restricted(
msg=MESSAGES.get('ProgramIsGeoBlocked'),
countries=self._GEO_COUNTRIES)
message = data.get('endUserMessage') or MESSAGES.get(message_type, message_type)
raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
return self._download_json(
urljoin('http://psapi.nrk.no/', path),
video_id, note or 'Downloading %s JSON' % item,
fatal=fatal, query=query)
class NRKIE(NRKBaseIE): class NRKIE(NRKBaseIE):
@ -41,7 +83,7 @@ class NRKIE(NRKBaseIE):
_TESTS = [{ _TESTS = [{
# video # video
'url': 'http://www.nrk.no/video/PS*150533', 'url': 'http://www.nrk.no/video/PS*150533',
'md5': '706f34cdf1322577589e369e522b50ef', 'md5': 'f46be075326e23ad0e524edfcb06aeb6',
'info_dict': { 'info_dict': {
'id': '150533', 'id': '150533',
'ext': 'mp4', 'ext': 'mp4',
@ -55,7 +97,7 @@ class NRKIE(NRKBaseIE):
# MD5 is unstable # MD5 is unstable
'info_dict': { 'info_dict': {
'id': '154915', 'id': '154915',
'ext': 'flv', 'ext': 'mp4',
'title': 'Slik høres internett ut når du er blind', 'title': 'Slik høres internett ut når du er blind',
'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
'duration': 20, 'duration': 20,
@ -78,9 +120,15 @@ class NRKIE(NRKBaseIE):
}] }]
def _extract_from_playback(self, video_id): def _extract_from_playback(self, video_id):
manifest = self._download_json( path_templ = 'playback/%s/' + video_id
'http://psapi.nrk.no/playback/manifest/%s' % video_id,
video_id, 'Downloading manifest JSON') def call_playback_api(item, query=None):
return self._call_api(path_templ % item, video_id, item, query=query)
# known values for preferredCdn: akamai, iponly, minicdn and telenor
manifest = call_playback_api('manifest', {'preferredCdn': 'akamai'})
if manifest.get('playability') == 'nonPlayable':
self._raise_error(manifest['nonPlayable'])
playable = manifest['playable'] playable = manifest['playable']
@ -94,14 +142,10 @@ class NRKIE(NRKBaseIE):
if not format_url: if not format_url:
continue continue
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8': if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_nrk_formats(format_url, video_id))
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
data = self._download_json( data = call_playback_api('metadata')
'http://psapi.nrk.no/playback/metadata/%s' % video_id,
video_id, 'Downloading metadata JSON')
preplay = data['preplay'] preplay = data['preplay']
titles = preplay['titles'] titles = preplay['titles']
@ -143,29 +187,22 @@ class NRKIE(NRKBaseIE):
class NRKTVIE(NRKBaseIE): class NRKTVIE(NRKBaseIE):
IE_DESC = 'NRK TV and NRK Radio' IE_DESC = 'NRK TV and NRK Radio'
_EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})' _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
_VALID_URL = r'''(?x) _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE
https?://
(?:tv|radio)\.nrk(?:super)?\.no/
(?:serie(?:/[^/]+){1,2}|program)/
(?![Ee]pisodes)%s
(?:/\d{2}-\d{2}-\d{4})?
(?:\#del=(?P<part_id>\d+))?
''' % _EPISODE_RE
_API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no') _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
_TESTS = [{ _TESTS = [{
'url': 'https://tv.nrk.no/program/MDDP12000117', 'url': 'https://tv.nrk.no/program/MDDP12000117',
'md5': '8270824df46ec629b66aeaa5796b36fb', 'md5': 'c4a5960f1b00b40d47db65c1064e0ab1',
'info_dict': { 'info_dict': {
'id': 'MDDP12000117AA', 'id': 'MDDP12000117AA',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Alarm Trolltunga', 'title': 'Alarm Trolltunga',
'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce', 'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
'duration': 2223, 'duration': 2223.44,
'age_limit': 6, 'age_limit': 6,
}, },
}, { }, {
'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
'md5': '9a167e54d04671eb6317a37b7bc8a280', 'md5': '8d40dab61cea8ab0114e090b029a0565',
'info_dict': { 'info_dict': {
'id': 'MUHH48000314AA', 'id': 'MUHH48000314AA',
'ext': 'mp4', 'ext': 'mp4',
@ -175,7 +212,6 @@ class NRKTVIE(NRKBaseIE):
'series': '20 spørsmål', 'series': '20 spørsmål',
'episode': '23.05.2014', 'episode': '23.05.2014',
}, },
'skip': 'NoProgramRights',
}, { }, {
'url': 'https://tv.nrk.no/program/mdfp15000514', 'url': 'https://tv.nrk.no/program/mdfp15000514',
'info_dict': { 'info_dict': {
@ -183,7 +219,7 @@ class NRKTVIE(NRKBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014', 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db', 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
'duration': 4605, 'duration': 4605.08,
'series': 'Kunnskapskanalen', 'series': 'Kunnskapskanalen',
'episode': '24.05.2014', 'episode': '24.05.2014',
}, },
@ -194,51 +230,25 @@ class NRKTVIE(NRKBaseIE):
# single playlist video # single playlist video
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
'info_dict': { 'info_dict': {
'id': 'MSPO40010515-part2', 'id': 'MSPO40010515AH',
'ext': 'flv', 'ext': 'mp4',
'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 'description': 'md5:c03aba1e917561eface5214020551b7a',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'expected_warnings': ['Video is geo restricted'], 'expected_warnings': ['Failed to download m3u8 information'],
'skip': 'particular part is not supported currently', 'skip': 'particular part is not supported currently',
}, { }, {
'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
'playlist': [{
'info_dict': {
'id': 'MSPO40010515AH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
'duration': 772,
'series': 'Tour de Ski',
'episode': '06.01.2015',
},
'params': {
'skip_download': True,
},
}, {
'info_dict': {
'id': 'MSPO40010515BH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
'duration': 6175,
'series': 'Tour de Ski',
'episode': '06.01.2015',
},
'params': {
'skip_download': True,
},
}],
'info_dict': { 'info_dict': {
'id': 'MSPO40010515', 'id': 'MSPO40010515AH',
'ext': 'mp4',
'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015', 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
'description': 'md5:1f97a41f05a9486ee00c56f35f82993d', 'description': 'md5:c03aba1e917561eface5214020551b7a',
}, },
'expected_warnings': ['Video is geo restricted'], 'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13', 'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
'info_dict': { 'info_dict': {
@ -269,12 +279,16 @@ class NRKTVIE(NRKBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'ProgramRightsHasExpired',
}, { }, {
'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller', 'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201507/NPUB21019315',
'only_matching': True,
}] }]
_api_host = None _api_host = None
@ -295,6 +309,7 @@ class NRKTVIE(NRKBaseIE):
title = data.get('fullTitle') or data.get('mainTitle') or data['title'] title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id video_id = data.get('id') or video_id
urls = []
entries = [] entries = []
conviva = data.get('convivaStatistics') or {} conviva = data.get('convivaStatistics') or {}
@ -311,19 +326,14 @@ class NRKTVIE(NRKBaseIE):
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx))) else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
for num, asset in enumerate(media_assets, 1): for num, asset in enumerate(media_assets, 1):
asset_url = asset.get('url') asset_url = asset.get('url')
if not asset_url: if not asset_url or asset_url in urls:
continue continue
formats = self._extract_akamai_formats(asset_url, video_id) urls.append(asset_url)
formats = self._extract_nrk_formats(asset_url, video_id)
if not formats: if not formats:
continue continue
self._sort_formats(formats) self._sort_formats(formats)
# Some f4m streams may not work with hdcore in fragments' URLs
for f in formats:
extra_param = f.get('extra_param_to_segment_url')
if extra_param and 'hdcore' in extra_param:
del f['extra_param_to_segment_url']
entry_id, entry_title = video_id_and_title(num) entry_id, entry_title = video_id_and_title(num)
duration = parse_duration(asset.get('duration')) duration = parse_duration(asset.get('duration'))
subtitles = {} subtitles = {}
@ -339,38 +349,26 @@ class NRKTVIE(NRKBaseIE):
'duration': duration, 'duration': duration,
'subtitles': subtitles, 'subtitles': subtitles,
'formats': formats, 'formats': formats,
'is_live': live,
}) })
if not entries: if not entries:
media_url = data.get('mediaUrl') media_url = data.get('mediaUrl')
if media_url: if media_url and media_url not in urls:
formats = self._extract_akamai_formats(media_url, video_id) formats = self._extract_nrk_formats(media_url, video_id)
self._sort_formats(formats) if formats:
duration = parse_duration(data.get('duration')) self._sort_formats(formats)
entries = [{ duration = parse_duration(data.get('duration'))
'id': video_id, entries = [{
'title': make_title(title), 'id': video_id,
'duration': duration, 'title': make_title(title),
'formats': formats, 'duration': duration,
}] 'formats': formats,
'is_live': live,
}]
if not entries: if not entries:
MESSAGES = { self._raise_error(data)
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
'NoProgramRights': 'Ikke tilgjengelig',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
message_type = data.get('messageType', '')
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type:
self.raise_geo_restricted(
msg=MESSAGES.get('ProgramIsGeoBlocked'),
countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
message_type, message_type)),
expected=True)
series = conviva.get('seriesName') or data.get('seriesTitle') series = conviva.get('seriesName') or data.get('seriesTitle')
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
@ -512,57 +510,98 @@ class NRKTVEpisodeIE(InfoExtractor):
return info return info
class NRKTVSerieBaseIE(InfoExtractor): class NRKTVSerieBaseIE(NRKBaseIE):
def _extract_series(self, webpage, display_id, fatal=True):
config = self._parse_json(
self._search_regex(
(r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
display_id, fatal=False, transform_source=js_to_json)
if not config:
return
return try_get(
config,
(lambda x: x['initialState']['series'], lambda x: x['series']),
dict)
def _extract_seasons(self, seasons):
if not isinstance(seasons, list):
return []
entries = []
for season in seasons:
entries.extend(self._extract_episodes(season))
return entries
def _extract_episodes(self, season):
if not isinstance(season, dict):
return []
return self._extract_entries(season.get('episodes'))
def _extract_entries(self, entry_list): def _extract_entries(self, entry_list):
if not isinstance(entry_list, list): if not isinstance(entry_list, list):
return [] return []
entries = [] entries = []
for episode in entry_list: for episode in entry_list:
nrk_id = episode.get('prfId') nrk_id = episode.get('prfId') or episode.get('episodeId')
if not nrk_id or not isinstance(nrk_id, compat_str): if not nrk_id or not isinstance(nrk_id, compat_str):
continue continue
if not re.match(NRKTVIE._EPISODE_RE, nrk_id):
continue
entries.append(self.url_result( entries.append(self.url_result(
'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)) 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
return entries return entries
_ASSETS_KEYS = ('episodes', 'instalments',)
def _extract_assets_key(self, embedded):
for asset_key in self._ASSETS_KEYS:
if embedded.get(asset_key):
return asset_key
def _entries(self, data, display_id):
for page_num in itertools.count(1):
embedded = data.get('_embedded') or data
if not isinstance(embedded, dict):
break
assets_key = self._extract_assets_key(embedded)
if not assets_key:
break
# Extract entries
entries = try_get(
embedded,
(lambda x: x[assets_key]['_embedded'][assets_key],
lambda x: x[assets_key]),
list)
for e in self._extract_entries(entries):
yield e
# Find next URL
next_url_path = try_get(
data,
(lambda x: x['_links']['next']['href'],
lambda x: x['_embedded'][assets_key]['_links']['next']['href']),
compat_str)
if not next_url_path:
break
data = self._call_api(
next_url_path, display_id,
note='Downloading %s JSON page %d' % (assets_key, page_num),
fatal=False)
if not data:
break
class NRKTVSeasonIE(NRKTVSerieBaseIE): class NRKTVSeasonIE(NRKTVSerieBaseIE):
_VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)' _VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk\.no/serie/(?P<serie>[^/]+)/(?:sesong/)?(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'https://tv.nrk.no/serie/backstage/sesong/1', 'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
'info_dict': { 'info_dict': {
'id': '1', 'id': 'backstage/1',
'title': 'Sesong 1', 'title': 'Sesong 1',
}, },
'playlist_mincount': 30, 'playlist_mincount': 30,
} }, {
# no /sesong/ in path
'url': 'https://tv.nrk.no/serie/lindmo/2016',
'info_dict': {
'id': 'lindmo/2016',
'title': '2016',
},
'playlist_mincount': 29,
}, {
# weird nested _embedded in catalog JSON response
'url': 'https://radio.nrk.no/serie/dickie-dick-dickens/sesong/1',
'info_dict': {
'id': 'dickie-dick-dickens/1',
'title': 'Sesong 1',
},
'playlist_mincount': 11,
}, {
# 841 entries, multi page
'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201509',
'info_dict': {
'id': 'dagsnytt/201509',
'title': 'September 2015',
},
'playlist_mincount': 841,
}, {
# 180 entries, single page
'url': 'https://tv.nrk.no/serie/spangas/sesong/1',
'only_matching': True,
}]
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
@ -570,25 +609,35 @@ class NRKTVSeasonIE(NRKTVSerieBaseIE):
else super(NRKTVSeasonIE, cls).suitable(url)) else super(NRKTVSeasonIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) domain, serie, season_id = re.match(self._VALID_URL, url).groups()
display_id = '%s/%s' % (serie, season_id)
webpage = self._download_webpage(url, display_id)
series = self._extract_series(webpage, display_id) data = self._call_api(
'%s/catalog/series/%s/seasons/%s' % (domain, serie, season_id),
display_id, 'season', query={'pageSize': 50})
season = next( title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id
s for s in series['seasons']
if int(display_id) == s.get('seasonNumber'))
title = try_get(season, lambda x: x['titles']['title'], compat_str)
return self.playlist_result( return self.playlist_result(
self._extract_episodes(season), display_id, title) self._entries(data, display_id),
display_id, title)
class NRKTVSeriesIE(NRKTVSerieBaseIE): class NRKTVSeriesIE(NRKTVSerieBaseIE):
_VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)' _VALID_URL = r'https?://(?P<domain>(?:tv|radio)\.nrk|(?:tv\.)?nrksuper)\.no/serie/(?P<id>[^/]+)'
_ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# new layout, instalments
'url': 'https://tv.nrk.no/serie/groenn-glede',
'info_dict': {
'id': 'groenn-glede',
'title': 'Grønn glede',
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
},
'playlist_mincount': 90,
}, {
# new layout, instalments, more entries
'url': 'https://tv.nrk.no/serie/lindmo',
'only_matching': True,
}, {
'url': 'https://tv.nrk.no/serie/blank', 'url': 'https://tv.nrk.no/serie/blank',
'info_dict': { 'info_dict': {
'id': 'blank', 'id': 'blank',
@ -602,25 +651,16 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
'info_dict': { 'info_dict': {
'id': 'backstage', 'id': 'backstage',
'title': 'Backstage', 'title': 'Backstage',
'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3', 'description': 'md5:63692ceb96813d9a207e9910483d948b',
}, },
'playlist_mincount': 60, 'playlist_mincount': 60,
}, {
# new layout, instalments
'url': 'https://tv.nrk.no/serie/groenn-glede',
'info_dict': {
'id': 'groenn-glede',
'title': 'Grønn glede',
'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
},
'playlist_mincount': 10,
}, { }, {
# old layout # old layout
'url': 'https://tv.nrksuper.no/serie/labyrint', 'url': 'https://tv.nrksuper.no/serie/labyrint',
'info_dict': { 'info_dict': {
'id': 'labyrint', 'id': 'labyrint',
'title': 'Labyrint', 'title': 'Labyrint',
'description': 'md5:318b597330fdac5959247c9b69fdb1ec', 'description': 'I Daidalos sin undersjøiske Labyrint venter spennende oppgaver, skumle robotskapninger og slim.',
}, },
'playlist_mincount': 3, 'playlist_mincount': 3,
}, { }, {
@ -632,6 +672,17 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
}, { }, {
'url': 'https://tv.nrk.no/serie/postmann-pat', 'url': 'https://tv.nrk.no/serie/postmann-pat',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://radio.nrk.no/serie/dickie-dick-dickens',
'info_dict': {
'id': 'dickie-dick-dickens',
'title': 'Dickie Dick Dickens',
'description': 'md5:19e67411ffe57f7dce08a943d7a0b91f',
},
'playlist_mincount': 8,
}, {
'url': 'https://nrksuper.no/serie/labyrint',
'only_matching': True,
}] }]
@classmethod @classmethod
@ -642,43 +693,42 @@ class NRKTVSeriesIE(NRKTVSerieBaseIE):
else super(NRKTVSeriesIE, cls).suitable(url)) else super(NRKTVSeriesIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
series_id = self._match_id(url) site, series_id = re.match(self._VALID_URL, url).groups()
is_radio = site == 'radio.nrk'
webpage = self._download_webpage(url, series_id) domain = 'radio' if is_radio else 'tv'
# New layout (e.g. https://tv.nrk.no/serie/backstage) size_prefix = 'p' if is_radio else 'embeddedInstalmentsP'
series = self._extract_series(webpage, series_id, fatal=False) series = self._call_api(
if series: '%s/catalog/series/%s' % (domain, series_id),
title = try_get(series, lambda x: x['titles']['title'], compat_str) series_id, 'serie', query={size_prefix + 'ageSize': 50})
description = try_get( titles = try_get(series, [
series, lambda x: x['titles']['subtitle'], compat_str) lambda x: x['titles'],
entries = [] lambda x: x[x['type']]['titles'],
entries.extend(self._extract_seasons(series.get('seasons'))) lambda x: x[x['seriesType']]['titles'],
entries.extend(self._extract_entries(series.get('instalments'))) ]) or {}
entries.extend(self._extract_episodes(series.get('extraMaterial')))
return self.playlist_result(entries, series_id, title, description)
# Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
entries = [
self.url_result(
'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
series=series_id, season=season_id))
for season_id in re.findall(self._ITEM_RE, webpage)
]
title = self._html_search_meta( entries = []
'seriestitle', webpage, entries.extend(self._entries(series, series_id))
'title', default=None) or self._og_search_title( embedded = series.get('_embedded') or {}
webpage, fatal=False) linked_seasons = try_get(series, lambda x: x['_links']['seasons']) or []
if title: embedded_seasons = embedded.get('seasons') or []
title = self._search_regex( if len(linked_seasons) > len(embedded_seasons):
r'NRK (?:Super )?TV\s*[-]\s*(.+)', title, 'title', default=title) for season in linked_seasons:
season_name = season.get('name')
description = self._html_search_meta( if season_name and isinstance(season_name, compat_str):
'series_description', webpage, entries.append(self.url_result(
'description', default=None) or self._og_search_description(webpage) 'https://%s.nrk.no/serie/%s/sesong/%s'
% (domain, series_id, season_name),
ie=NRKTVSeasonIE.ie_key(),
video_title=season.get('title')))
else:
for season in embedded_seasons:
entries.extend(self._entries(season, series_id))
entries.extend(self._entries(
embedded.get('extraMaterial') or {}, series_id))
return self.playlist_result(entries, series_id, title, description) return self.playlist_result(
entries, series_id, titles.get('title'), titles.get('subtitle'))
class NRKTVDirekteIE(NRKTVIE): class NRKTVDirekteIE(NRKTVIE):
@ -782,14 +832,8 @@ class NRKSkoleIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( nrk_id = self._download_json(
'https://mimir.nrk.no/plugin/1.0/static?mediaId=%s' % video_id, 'https://nrkno-skole-prod.kube.nrk.no/skole/api/media/%s' % video_id,
video_id) video_id)['psId']
nrk_id = self._parse_json(
self._search_regex(
r'<script[^>]+type=["\']application/json["\'][^>]*>({.+?})</script>',
webpage, 'application json'),
video_id)['activeMedia']['psId']
return self.url_result('nrk:%s' % nrk_id) return self.url_result('nrk:%s' % nrk_id)

@ -541,6 +541,10 @@ class PeerTubeIE(InfoExtractor):
'format_id': format_id, 'format_id': format_id,
'filesize': file_size, 'filesize': file_size,
}) })
if format_id == '0p':
f['vcodec'] = 'none'
else:
f['fps'] = int_or_none(file_.get('fps'))
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)

@ -31,7 +31,12 @@ class PornHubBaseIE(InfoExtractor):
def dl(*args, **kwargs): def dl(*args, **kwargs):
return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
webpage, urlh = dl(*args, **kwargs) ret = dl(*args, **kwargs)
if not ret:
return ret
webpage, urlh = ret
if any(re.search(p, webpage) for p in ( if any(re.search(p, webpage) for p in (
r'<body\b[^>]+\bonload=["\']go\(\)', r'<body\b[^>]+\bonload=["\']go\(\)',
@ -53,7 +58,7 @@ class PornHubIE(PornHubBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/ (?:www\.)?thumbzilla\.com/video/
) )
(?P<id>[\da-z]+) (?P<id>[\da-z]+)
@ -152,6 +157,9 @@ class PornHubIE(PornHubBaseIE):
}, { }, {
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933',
'only_matching': True,
}, { }, {
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82', 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
'only_matching': True, 'only_matching': True,
@ -160,7 +168,7 @@ class PornHubIE(PornHubBaseIE):
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
return re.findall( return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)', r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)',
webpage) webpage)
def _extract_count(self, pattern, webpage, name): def _extract_count(self, pattern, webpage, name):
@ -346,9 +354,9 @@ class PornHubIE(PornHubBaseIE):
view_count = self._extract_count( view_count = self._extract_count(
r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view') r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
like_count = self._extract_count( like_count = self._extract_count(
r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') r'<span[^>]+class="votesUp"[^>]*>([\d,\.]+)</span>', webpage, 'like')
dislike_count = self._extract_count( dislike_count = self._extract_count(
r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike') r'<span[^>]+class="votesDown"[^>]*>([\d,\.]+)</span>', webpage, 'dislike')
comment_count = self._extract_count( comment_count = self._extract_count(
r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
@ -422,7 +430,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
class PornHubUserIE(PornHubPlaylistBaseIE): class PornHubUserIE(PornHubPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph', 'url': 'https://www.pornhub.com/model/zoe_ph',
'playlist_mincount': 118, 'playlist_mincount': 118,
@ -490,7 +498,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)' _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
'only_matching': True, 'only_matching': True,
@ -605,7 +613,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
'info_dict': { 'info_dict': {

@ -269,7 +269,7 @@ class TeachableCourseIE(TeachableBaseIE):
r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)', r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
webpage): webpage):
li = mobj.group('li') li = mobj.group('li')
if 'fa-youtube-play' not in li: if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li):
continue continue
lecture_url = self._search_regex( lecture_url = self._search_regex(
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li, r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,

@ -11,13 +11,13 @@ from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request, strip_or_none,
) )
class ToggleIE(InfoExtractor): class ToggleIE(InfoExtractor):
IE_NAME = 'toggle' IE_NAME = 'toggle'
_VALID_URL = r'https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)' _VALID_URL = r'(?:https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}|toggle:)(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115', 'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
'info_dict': { 'info_dict': {
@ -84,28 +84,12 @@ class ToggleIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
_FORMAT_PREFERENCES = {
'wvm-STBMain': -10,
'wvm-iPadMain': -20,
'wvm-iPhoneMain': -30,
'wvm-Android': -40,
}
_API_USER = 'tvpapi_147' _API_USER = 'tvpapi_147'
_API_PASS = '11111' _API_PASS = '11111'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, note='Downloading video page')
api_user = self._search_regex(
r'apiUser\s*:\s*(["\'])(?P<user>.+?)\1', webpage, 'apiUser',
default=self._API_USER, group='user')
api_pass = self._search_regex(
r'apiPass\s*:\s*(["\'])(?P<pass>.+?)\1', webpage, 'apiPass',
default=self._API_PASS, group='pass')
params = { params = {
'initObj': { 'initObj': {
'Locale': { 'Locale': {
@ -118,17 +102,16 @@ class ToggleIE(InfoExtractor):
'SiteGuid': 0, 'SiteGuid': 0,
'DomainID': '0', 'DomainID': '0',
'UDID': '', 'UDID': '',
'ApiUser': api_user, 'ApiUser': self._API_USER,
'ApiPass': api_pass 'ApiPass': self._API_PASS
}, },
'MediaID': video_id, 'MediaID': video_id,
'mediaType': 0, 'mediaType': 0,
} }
req = sanitized_Request( info = self._download_json(
'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo', 'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
json.dumps(params).encode('utf-8')) video_id, 'Downloading video info json', data=json.dumps(params).encode('utf-8'))
info = self._download_json(req, video_id, 'Downloading video info json')
title = info['MediaName'] title = info['MediaName']
@ -141,11 +124,16 @@ class ToggleIE(InfoExtractor):
vid_format = vid_format.replace(' ', '') vid_format = vid_format.replace(' ', '')
# if geo-restricted, m3u8 is inaccessible, but mp4 is okay # if geo-restricted, m3u8 is inaccessible, but mp4 is okay
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
video_url, video_id, ext='mp4', m3u8_id=vid_format, video_url, video_id, ext='mp4', m3u8_id=vid_format,
note='Downloading %s m3u8 information' % vid_format, note='Downloading %s m3u8 information' % vid_format,
errnote='Failed to download %s m3u8 information' % vid_format, errnote='Failed to download %s m3u8 information' % vid_format,
fatal=False)) fatal=False)
for f in m3u8_formats:
# Apple FairPlay Streaming
if '/fpshls/' in f['url']:
continue
formats.append(f)
elif ext == 'mpd': elif ext == 'mpd':
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
video_url, video_id, mpd_id=vid_format, video_url, video_id, mpd_id=vid_format,
@ -158,28 +146,21 @@ class ToggleIE(InfoExtractor):
note='Downloading %s ISM manifest' % vid_format, note='Downloading %s ISM manifest' % vid_format,
errnote='Failed to download %s ISM manifest' % vid_format, errnote='Failed to download %s ISM manifest' % vid_format,
fatal=False)) fatal=False))
elif ext in ('mp4', 'wvm'): elif ext == 'mp4':
# wvm are drm-protected files
formats.append({ formats.append({
'ext': ext, 'ext': ext,
'url': video_url, 'url': video_url,
'format_id': vid_format, 'format_id': vid_format,
'preference': self._FORMAT_PREFERENCES.get(ext + '-' + vid_format) or -1,
'format_note': 'DRM-protected video' if ext == 'wvm' else None
}) })
if not formats: if not formats:
for meta in (info.get('Metas') or []):
if meta.get('Key') == 'Encryption' and meta.get('Value') == '1':
raise ExtractorError(
'This video is DRM protected.', expected=True)
# Most likely because geo-blocked # Most likely because geo-blocked
raise ExtractorError('No downloadable videos found', expected=True) raise ExtractorError('No downloadable videos found', expected=True)
self._sort_formats(formats) self._sort_formats(formats)
duration = int_or_none(info.get('Duration'))
description = info.get('Description')
created_at = parse_iso8601(info.get('CreationDate') or None)
average_rating = float_or_none(info.get('Rating'))
view_count = int_or_none(info.get('ViewCounter') or info.get('view_counter'))
like_count = int_or_none(info.get('LikeCounter') or info.get('like_counter'))
thumbnails = [] thumbnails = []
for picture in info.get('Pictures', []): for picture in info.get('Pictures', []):
if not isinstance(picture, dict): if not isinstance(picture, dict):
@ -199,15 +180,46 @@ class ToggleIE(InfoExtractor):
}) })
thumbnails.append(thumbnail) thumbnails.append(thumbnail)
def counter(prefix):
return int_or_none(
info.get(prefix + 'Counter') or info.get(prefix.lower() + '_counter'))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': strip_or_none(info.get('Description')),
'duration': duration, 'duration': int_or_none(info.get('Duration')),
'timestamp': created_at, 'timestamp': parse_iso8601(info.get('CreationDate') or None),
'average_rating': average_rating, 'average_rating': float_or_none(info.get('Rating')),
'view_count': view_count, 'view_count': counter('View'),
'like_count': like_count, 'like_count': counter('Like'),
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'formats': formats, 'formats': formats,
} }
class MeWatchIE(InfoExtractor):
IE_NAME = 'mewatch'
_VALID_URL = r'https?://(?:www\.)?mewatch\.sg/watch/[0-9a-zA-Z-]+-(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
'info_dict': {
'id': '1008625',
'ext': 'mp4',
'title': 'Recipe Of Life 味之道',
'timestamp': 1603306526,
'description': 'md5:6e88cde8af2068444fc8e1bc3ebf257c',
'upload_date': '20201021',
},
'params': {
'skip_download': 'm3u8 download',
},
}]
def _real_extract(self, url):
item_id = self._match_id(url)
custom_id = self._download_json(
'https://cdn.mewatch.sg/api/items/' + item_id,
item_id, query={'segments': 'all'})['customId']
return self.url_result(
'toggle:' + custom_id, ToggleIE.ie_key(), custom_id)

@ -4,7 +4,9 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
int_or_none,
smuggle_url, smuggle_url,
strip_or_none,
) )
@ -23,7 +25,8 @@ class TVAIE(InfoExtractor):
'params': { 'params': {
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
} },
'skip': 'HTTP Error 404: Not Found',
}, { }, {
'url': 'https://video.tva.ca/details/_5596811470001', 'url': 'https://video.tva.ca/details/_5596811470001',
'only_matching': True, 'only_matching': True,
@ -32,26 +35,54 @@ class TVAIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_data = self._download_json(
'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
'Accept': 'application/json',
}, query={
'appId': '5955fc5f23eec60006c951f1',
})
def get_attribute(key):
for attribute in video_data.get('attributes', []):
if attribute.get('key') == key:
return attribute.get('value')
return None
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'id': video_id, 'id': video_id,
'title': get_attribute('title'),
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}), 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
'description': get_attribute('description'),
'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'),
'duration': float_or_none(get_attribute('video-duration'), 1000),
'ie_key': 'BrightcoveNew', 'ie_key': 'BrightcoveNew',
} }
class QubIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619',
'md5': '949490fd0e7aee11d0543777611fbd53',
'info_dict': {
'id': '6084352463001',
'ext': 'mp4',
'title': 'Épisode 01',
'uploader_id': '5481942443001',
'upload_date': '20190907',
'timestamp': 1567899756,
'description': 'md5:9c0d7fbb90939420c651fd977df90145',
},
}, {
'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943',
'only_matching': True,
}]
# reference_id also works with old account_id(5481942443001)
# BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s'
def _real_extract(self, url):
entity_id = self._match_id(url)
entity = self._download_json(
'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities',
entity_id, query={'id': entity_id})
video_id = entity['videoId']
episode = strip_or_none(entity.get('name'))
return {
'_type': 'url_transparent',
'id': video_id,
'title': episode,
# 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'],
'url': 'https://videos.tva.ca/details/_' + video_id,
'description': entity.get('longDescription'),
'duration': float_or_none(entity.get('durationMillis'), 1000),
'episode': episode,
'episode_number': int_or_none(entity.get('episodeNumber')),
# 'ie_key': 'BrightcoveNew',
'ie_key': TVAIE.ie_key(),
}

@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
remove_start,
smuggle_url,
try_get,
)
class TVerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?P<path>(?:corner|episode|feature)/(?P<id>f?\d+))'
# videos are only available for 7 days
_TESTS = [{
'url': 'https://tver.jp/corner/f0062178',
'only_matching': True,
}, {
'url': 'https://tver.jp/feature/f0062413',
'only_matching': True,
}, {
'url': 'https://tver.jp/episode/79622438',
'only_matching': True,
}]
_TOKEN = None
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
def _real_initialize(self):
self._TOKEN = self._download_json(
'https://tver.jp/api/access_token.php', None)['token']
def _real_extract(self, url):
path, video_id = re.match(self._VALID_URL, url).groups()
main = self._download_json(
'https://api.tver.jp/v4/' + path, video_id,
query={'token': self._TOKEN})['main']
p_id = main['publisher_id']
service = remove_start(main['service'], 'ts_')
info = {
'_type': 'url_transparent',
'description': try_get(main, lambda x: x['note'][0]['text'], compat_str),
'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])),
}
if service == 'cx':
info.update({
'title': main.get('subtitle') or main['title'],
'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
'ie_key': 'FujiTVFODPlus7',
})
else:
r_id = main['reference_id']
if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
r_id = 'ref:' + r_id
bc_url = smuggle_url(
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
{'geo_countries': ['JP']})
info.update({
'url': bc_url,
'ie_key': 'BrightcoveNew',
})
return info

@ -1,10 +1,9 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import random import random
import re
import string import string
import struct
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -12,13 +11,14 @@ from ..utils import (
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
parse_codecs, parse_codecs,
update_url_query,
xpath_element, xpath_element,
xpath_text, xpath_text,
) )
from ..compat import ( from ..compat import (
compat_b64decode, compat_b64decode,
compat_ord, compat_ord,
compat_parse_qs, compat_struct_pack,
) )
@ -28,7 +28,7 @@ class VideaIE(InfoExtractor):
videa(?:kid)?\.hu/ videa(?:kid)?\.hu/
(?: (?:
videok/(?:[^/]+/)*[^?#&]+-| videok/(?:[^/]+/)*[^?#&]+-|
player\?.*?\bv=| (?:videojs_)?player\?.*?\bv=|
player/v/ player/v/
) )
(?P<id>[^?#&]+) (?P<id>[^?#&]+)
@ -62,6 +62,7 @@ class VideaIE(InfoExtractor):
'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', 'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
'only_matching': True, 'only_matching': True,
}] }]
_STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(webpage):
@ -69,75 +70,84 @@ class VideaIE(InfoExtractor):
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1', r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
webpage)] webpage)]
def rc4(self, ciphertext, key): @staticmethod
def rc4(cipher_text, key):
res = b'' res = b''
keyLen = len(key) key_len = len(key)
S = list(range(256)) S = list(range(256))
j = 0 j = 0
for i in range(256): for i in range(256):
j = (j + S[i] + ord(key[i % keyLen])) % 256 j = (j + S[i] + ord(key[i % key_len])) % 256
S[i], S[j] = S[j], S[i] S[i], S[j] = S[j], S[i]
i = 0 i = 0
j = 0 j = 0
for m in range(len(ciphertext)): for m in range(len(cipher_text)):
i = (i + 1) % 256 i = (i + 1) % 256
j = (j + S[i]) % 256 j = (j + S[i]) % 256
S[i], S[j] = S[j], S[i] S[i], S[j] = S[j], S[i]
k = S[(S[i] + S[j]) % 256] k = S[(S[i] + S[j]) % 256]
res += struct.pack("B", k ^ compat_ord(ciphertext[m])) res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
return res return res.decode()
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, fatal=True) query = {'v': video_id}
error = self._search_regex(r'<p class="error-text">([^<]+)</p>', webpage, 'error', default=None) player_page = self._download_webpage(
if error: 'https://videa.hu/player', video_id, query=query)
raise ExtractorError(error, expected=True)
nonce = self._search_regex(
video_src_params_raw = self._search_regex(r'<iframe[^>]+id="videa_player_iframe"[^>]+src="/player\?([^"]+)"', webpage, 'video_src_params') r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
video_src_params = compat_parse_qs(video_src_params_raw)
player_page = self._download_webpage("https://videa.hu/videojs_player?%s" % video_src_params_raw, video_id, fatal=True)
nonce = self._search_regex(r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
random_seed = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(8))
static_secret = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
l = nonce[:32] l = nonce[:32]
s = nonce[32:] s = nonce[32:]
result = '' result = ''
for i in range(0, 32): for i in range(0, 32):
result += s[i - (static_secret.index(l[i]) - 31)] result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
video_src_params['_s'] = random_seed random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
video_src_params['_t'] = result[:16] query['_s'] = random_seed
encryption_key_stem = result[16:] + random_seed query['_t'] = result[:16]
[b64_info, handle] = self._download_webpage_handle( b64_info, handle = self._download_webpage_handle(
'http://videa.hu/videaplayer_get_xml.php', video_id, 'http://videa.hu/videaplayer_get_xml.php', video_id, query=query)
query=video_src_params, fatal=True) if b64_info.startswith('<?xml'):
info = self._parse_xml(b64_info, video_id)
encrypted_info = compat_b64decode(b64_info) else:
key = encryption_key_stem + handle.info()['x-videa-xs'] key = result[16:] + random_seed + handle.headers['x-videa-xs']
info_str = self.rc4(encrypted_info, key).decode('utf8') info = self._parse_xml(self.rc4(
info = self._parse_xml(info_str, video_id) compat_b64decode(b64_info), key), video_id)
video = xpath_element(info, './/video', 'video', fatal=True) video = xpath_element(info, './video', 'video')
sources = xpath_element(info, './/video_sources', 'sources', fatal=True) if not video:
hash_values = xpath_element(info, './/hash_values', 'hash_values', fatal=True) raise ExtractorError(xpath_element(
info, './error', fatal=True), expected=True)
sources = xpath_element(
info, './video_sources', 'sources', fatal=True)
hash_values = xpath_element(
info, './hash_values', 'hash values', fatal=True)
title = xpath_text(video, './title', fatal=True) title = xpath_text(video, './title', fatal=True)
formats = [] formats = []
for source in sources.findall('./video_source'): for source in sources.findall('./video_source'):
source_url = source.text source_url = source.text
if not source_url: source_name = source.get('name')
source_exp = source.get('exp')
if not (source_url and source_name and source_exp):
continue continue
source_url += '?md5=%s&expires=%s' % (hash_values.find('hash_value_%s' % source.get('name')).text, source.get('exp')) hash_value = xpath_text(hash_values, 'hash_value_' + source_name)
if not hash_value:
continue
source_url = update_url_query(source_url, {
'md5': hash_value,
'expires': source_exp,
})
f = parse_codecs(source.get('codecs')) f = parse_codecs(source.get('codecs'))
f.update({ f.update({
'url': source_url, 'url': self._proto_relative_url(source_url),
'ext': mimetype2ext(source.get('mimetype')) or 'mp4', 'ext': mimetype2ext(source.get('mimetype')) or 'mp4',
'format_id': source.get('name'), 'format_id': source.get('name'),
'width': int_or_none(source.get('width')), 'width': int_or_none(source.get('width')),
@ -146,8 +156,7 @@ class VideaIE(InfoExtractor):
formats.append(f) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = xpath_text(video, './poster_src') thumbnail = self._proto_relative_url(xpath_text(video, './poster_src'))
duration = int_or_none(xpath_text(video, './duration'))
age_limit = None age_limit = None
is_adult = xpath_text(video, './is_adult_content', default=None) is_adult = xpath_text(video, './is_adult_content', default=None)
@ -158,7 +167,7 @@ class VideaIE(InfoExtractor):
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': int_or_none(xpath_text(video, './duration')),
'age_limit': age_limit, 'age_limit': age_limit,
'formats': formats, 'formats': formats,
} }

@ -46,57 +46,69 @@ class YandexMusicBaseIE(InfoExtractor):
self._handle_error(response) self._handle_error(response)
return response return response
def _call_api(self, ep, tld, url, item_id, note, query):
return self._download_json(
'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep),
item_id, note,
fatal=False,
headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
'X-Retpath-Y': url,
},
query=query)
class YandexMusicTrackIE(YandexMusicBaseIE): class YandexMusicTrackIE(YandexMusicBaseIE):
IE_NAME = 'yandexmusic:track' IE_NAME = 'yandexmusic:track'
IE_DESC = 'Яндекс.Музыка - Трек' IE_DESC = 'Яндекс.Музыка - Трек'
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)' _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://music.yandex.ru/album/540508/track/4878838', 'url': 'http://music.yandex.ru/album/540508/track/4878838',
'md5': 'f496818aa2f60b6c0062980d2e00dc20', 'md5': 'dec8b661f12027ceaba33318787fff76',
'info_dict': { 'info_dict': {
'id': '4878838', 'id': '4878838',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', 'title': 'md5:c63e19341fdbe84e43425a30bc777856',
'filesize': 4628061, 'filesize': int,
'duration': 193.04, 'duration': 193.04,
'track': 'Gypsy Eyes 1', 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff',
'album': 'Gypsy Soul', 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a',
'album_artist': 'Carlo Ambrosio', 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200',
'artist': 'Carlo Ambrosio & Fabio Di Bari', 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160',
'release_year': 2009, 'release_year': 2009,
}, },
'skip': 'Travis CI servers blocked by YandexMusic', # 'skip': 'Travis CI servers blocked by YandexMusic',
}, { }, {
# multiple disks # multiple disks
'url': 'http://music.yandex.ru/album/3840501/track/705105', 'url': 'http://music.yandex.ru/album/3840501/track/705105',
'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e', 'md5': '82a54e9e787301dd45aba093cf6e58c0',
'info_dict': { 'info_dict': {
'id': '705105', 'id': '705105',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Hooverphonic - Sometimes', 'title': 'md5:f86d4a9188279860a83000277024c1a6',
'filesize': 5743386, 'filesize': int,
'duration': 239.27, 'duration': 239.27,
'track': 'Sometimes', 'track': 'md5:40f887f0666ba1aa10b835aca44807d1',
'album': 'The Best of Hooverphonic', 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873',
'album_artist': 'Hooverphonic', 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
'artist': 'Hooverphonic', 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12',
'release_year': 2016, 'release_year': 2016,
'genre': 'pop', 'genre': 'pop',
'disc_number': 2, 'disc_number': 2,
'track_number': 9, 'track_number': 9,
}, },
'skip': 'Travis CI servers blocked by YandexMusic', # 'skip': 'Travis CI servers blocked by YandexMusic',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
album_id, track_id = mobj.group('album_id'), mobj.group('id') tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id')
track = self._download_json( track = self._call_api(
'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id), 'track', tld, url, track_id, 'Downloading track JSON',
track_id, 'Downloading track JSON')['track'] {'track': '%s:%s' % (track_id, album_id)})['track']
track_title = track['title'] track_title = track['title']
download_data = self._download_json( download_data = self._download_json(
@ -109,8 +121,7 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
'Downloading track location JSON', 'Downloading track location JSON',
query={'format': 'json'}) query={'format': 'json'})
key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest() key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
storage = track['storageDir'].split('.') f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id'])
f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], storage[1])
thumbnail = None thumbnail = None
cover_uri = track.get('albums', [{}])[0].get('coverUri') cover_uri = track.get('albums', [{}])[0].get('coverUri')
@ -180,42 +191,85 @@ class YandexMusicTrackIE(YandexMusicBaseIE):
class YandexMusicPlaylistBaseIE(YandexMusicBaseIE): class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
def _extract_tracks(self, source, item_id, url, tld):
tracks = source['tracks']
track_ids = [compat_str(track_id) for track_id in source['trackIds']]
# tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
# missing tracks should be retrieved manually.
if len(tracks) < len(track_ids):
present_track_ids = set([
compat_str(track['id'])
for track in tracks if track.get('id')])
missing_track_ids = [
track_id for track_id in track_ids
if track_id not in present_track_ids]
missing_tracks = self._call_api(
'track-entries', tld, url, item_id,
'Downloading missing tracks JSON', {
'entries': ','.join(missing_track_ids),
'lang': tld,
'external-domain': 'music.yandex.%s' % tld,
'overembed': 'false',
'strict': 'true',
})
if missing_tracks:
tracks.extend(missing_tracks)
return tracks
def _build_playlist(self, tracks): def _build_playlist(self, tracks):
return [ entries = []
self.url_result( for track in tracks:
'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id'])) track_id = track.get('id') or track.get('realId')
for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)] if not track_id:
continue
albums = track.get('albums')
if not albums or not isinstance(albums, list):
continue
album = albums[0]
if not isinstance(album, dict):
continue
album_id = album.get('id')
if not album_id:
continue
entries.append(self.url_result(
'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id),
ie=YandexMusicTrackIE.ie_key(), video_id=track_id))
return entries
class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
IE_NAME = 'yandexmusic:album' IE_NAME = 'yandexmusic:album'
IE_DESC = 'Яндекс.Музыка - Альбом' IE_DESC = 'Яндекс.Музыка - Альбом'
_VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)' _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://music.yandex.ru/album/540508', 'url': 'http://music.yandex.ru/album/540508',
'info_dict': { 'info_dict': {
'id': '540508', 'id': '540508',
'title': 'Carlo Ambrosio - Gypsy Soul (2009)', 'title': 'md5:7ed1c3567f28d14be9f61179116f5571',
}, },
'playlist_count': 50, 'playlist_count': 50,
'skip': 'Travis CI servers blocked by YandexMusic', # 'skip': 'Travis CI servers blocked by YandexMusic',
}, { }, {
'url': 'https://music.yandex.ru/album/3840501', 'url': 'https://music.yandex.ru/album/3840501',
'info_dict': { 'info_dict': {
'id': '3840501', 'id': '3840501',
'title': 'Hooverphonic - The Best of Hooverphonic (2016)', 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f',
}, },
'playlist_count': 33, 'playlist_count': 33,
'skip': 'Travis CI servers blocked by YandexMusic', # 'skip': 'Travis CI servers blocked by YandexMusic',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
album_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
tld = mobj.group('tld')
album_id = mobj.group('id')
album = self._download_json( album = self._call_api(
'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id, 'album', tld, url, album_id, 'Downloading album JSON',
album_id, 'Downloading album JSON') {'album': album_id})
entries = self._build_playlist([track for volume in album['volumes'] for track in volume]) entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
@ -236,21 +290,24 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
'url': 'http://music.yandex.ru/users/music.partners/playlists/1245', 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
'info_dict': { 'info_dict': {
'id': '1245', 'id': '1245',
'title': 'Что слушают Enter Shikari', 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097',
'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9', 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
}, },
'playlist_count': 6, 'playlist_count': 5,
'skip': 'Travis CI servers blocked by YandexMusic', # 'skip': 'Travis CI servers blocked by YandexMusic',
}, { }, {
# playlist exceeding the limit of 150 tracks shipped with webpage (see
# https://github.com/ytdl-org/youtube-dl/issues/6666)
'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036', 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
'only_matching': True,
}, {
# playlist exceeding the limit of 150 tracks (see
# https://github.com/ytdl-org/youtube-dl/issues/6666)
'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364',
'info_dict': { 'info_dict': {
'id': '1036', 'id': '1364',
'title': 'Музыка 90-х', 'title': 'md5:b3b400f997d3f878a13ae0699653f7db',
}, },
'playlist_mincount': 300, 'playlist_mincount': 437,
'skip': 'Travis CI servers blocked by YandexMusic', # 'skip': 'Travis CI servers blocked by YandexMusic',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -259,16 +316,8 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
user = mobj.group('user') user = mobj.group('user')
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
playlist = self._download_json( playlist = self._call_api(
'https://music.yandex.%s/handlers/playlist.jsx' % tld, 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', {
playlist_id, 'Downloading missing tracks JSON',
fatal=False,
headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
'X-Retpath-Y': url,
},
query={
'owner': user, 'owner': user,
'kinds': playlist_id, 'kinds': playlist_id,
'light': 'true', 'light': 'true',
@ -277,37 +326,103 @@ class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
'overembed': 'false', 'overembed': 'false',
})['playlist'] })['playlist']
tracks = playlist['tracks'] tracks = self._extract_tracks(playlist, playlist_id, url, tld)
track_ids = [compat_str(track_id) for track_id in playlist['trackIds']]
# tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
# missing tracks should be retrieved manually.
if len(tracks) < len(track_ids):
present_track_ids = set([
compat_str(track['id'])
for track in tracks if track.get('id')])
missing_track_ids = [
track_id for track_id in track_ids
if track_id not in present_track_ids]
missing_tracks = self._download_json(
'https://music.yandex.%s/handlers/track-entries.jsx' % tld,
playlist_id, 'Downloading missing tracks JSON',
fatal=False,
headers={
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
},
query={
'entries': ','.join(missing_track_ids),
'lang': tld,
'external-domain': 'music.yandex.%s' % tld,
'overembed': 'false',
'strict': 'true',
})
if missing_tracks:
tracks.extend(missing_tracks)
return self.playlist_result( return self.playlist_result(
self._build_playlist(tracks), self._build_playlist(tracks),
compat_str(playlist_id), compat_str(playlist_id),
playlist.get('title'), playlist.get('description')) playlist.get('title'), playlist.get('description'))
class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE):
def _call_artist(self, tld, url, artist_id):
return self._call_api(
'artist', tld, url, artist_id,
'Downloading artist %s JSON' % self._ARTIST_WHAT, {
'artist': artist_id,
'what': self._ARTIST_WHAT,
'sort': self._ARTIST_SORT or '',
'dir': '',
'period': '',
'lang': tld,
'external-domain': 'music.yandex.%s' % tld,
'overembed': 'false',
})
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
tld = mobj.group('tld')
artist_id = mobj.group('id')
data = self._call_artist(tld, url, artist_id)
tracks = self._extract_tracks(data, artist_id, url, tld)
title = try_get(data, lambda x: x['artist']['name'], compat_str)
return self.playlist_result(
self._build_playlist(tracks), artist_id, title)
class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE):
IE_NAME = 'yandexmusic:artist:tracks'
IE_DESC = 'Яндекс.Музыка - Артист - Треки'
_VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/artist/(?P<id>\d+)/tracks'
_TESTS = [{
'url': 'https://music.yandex.ru/artist/617526/tracks',
'info_dict': {
'id': '617526',
'title': 'md5:131aef29d45fd5a965ca613e708c040b',
},
'playlist_count': 507,
# 'skip': 'Travis CI servers blocked by YandexMusic',
}]
_ARTIST_SORT = ''
_ARTIST_WHAT = 'tracks'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
tld = mobj.group('tld')
artist_id = mobj.group('id')
data = self._call_artist(tld, url, artist_id)
tracks = self._extract_tracks(data, artist_id, url, tld)
artist = try_get(data, lambda x: x['artist']['name'], compat_str)
title = '%s - %s' % (artist or artist_id, 'Треки')
return self.playlist_result(
self._build_playlist(tracks), artist_id, title)
class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE):
IE_NAME = 'yandexmusic:artist:albums'
IE_DESC = 'Яндекс.Музыка - Артист - Альбомы'
_VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/artist/(?P<id>\d+)/albums'
_TESTS = [{
'url': 'https://music.yandex.ru/artist/617526/albums',
'info_dict': {
'id': '617526',
'title': 'md5:55dc58d5c85699b7fb41ee926700236c',
},
'playlist_count': 8,
# 'skip': 'Travis CI servers blocked by YandexMusic',
}]
_ARTIST_SORT = 'year'
_ARTIST_WHAT = 'albums'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
tld = mobj.group('tld')
artist_id = mobj.group('id')
data = self._call_artist(tld, url, artist_id)
entries = []
for album in data['albums']:
if not isinstance(album, dict):
continue
album_id = album.get('id')
if not album_id:
continue
entries.append(self.url_result(
'http://music.yandex.ru/album/%s' % album_id,
ie=YandexMusicAlbumIE.ie_key(), video_id=album_id))
artist = try_get(data, lambda x: x['artist']['name'], compat_str)
title = '%s - %s' % (artist or artist_id, 'Альбомы')
return self.playlist_result(entries, artist_id, title)

@ -41,7 +41,7 @@ class ZDFBaseIE(InfoExtractor):
class ZDFIE(ZDFBaseIE): class ZDFIE(ZDFBaseIE):
IE_NAME = "ZDF-3sat" IE_NAME = "ZDF-3sat"
_VALID_URL = r'https?://www\.(zdf|3sat)\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html' _VALID_URL = r'https?://www\.(zdf|3sat)\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh') _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd')
_GEO_COUNTRIES = ['DE'] _GEO_COUNTRIES = ['DE']
_TESTS = [{ _TESTS = [{
@ -131,7 +131,7 @@ class ZDFIE(ZDFBaseIE):
if not ptmd_path: if not ptmd_path:
ptmd_path = t[ ptmd_path = t[
'http://zdf.de/rels/streams/ptmd-template'].replace( 'http://zdf.de/rels/streams/ptmd-template'].replace(
'{playerId}', 'portal') '{playerId}', 'ngplayer_2_4')
ptmd = self._call_api( ptmd = self._call_api(
urljoin(url, ptmd_path), player, url, video_id, 'metadata') urljoin(url, ptmd_path), player, url, video_id, 'metadata')

Loading…
Cancel
Save