diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index db2295572..ad11521f7 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -477,6 +477,7 @@
- **massengeschmack.tv**
- **MatchTV**
- **MDR**: MDR.DE and KiKA
+ - **MedalTV**
- **media.ccc.de**
- **media.ccc.de:lists**
- **Medialaan**
@@ -846,6 +847,10 @@
- **Sport5**
- **SportBox**
- **SportDeutschland**
+ - **Spreaker**
+ - **SpreakerPage**
+ - **SpreakerShow**
+ - **SpreakerShowPage**
- **SpringboardPlatform**
- **Sprout**
- **sr:mediathek**: Saarländischer Rundfunk
@@ -1064,7 +1069,7 @@
- **vk:wallpost**
- **vlive**
- **vlive:channel**
- - **vlive:playlist**
+ - **vlive:post**
- **Vodlocker**
- **VODPl**
- **VODPlatform**
diff --git a/youtube_dlc/downloader/fragment.py b/youtube_dlc/downloader/fragment.py
index 9339b3a62..cf4fd41da 100644
--- a/youtube_dlc/downloader/fragment.py
+++ b/youtube_dlc/downloader/fragment.py
@@ -97,12 +97,15 @@ class FragmentFD(FileDownloader):
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
- success = ctx['dl'].download(fragment_filename, {
+ fragment_info_dict = {
'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'),
- })
+ }
+ success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False, None
+ if fragment_info_dict.get('filetime'):
+ ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read()
@@ -258,6 +261,13 @@ class FragmentFD(FileDownloader):
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
else:
self.try_rename(ctx['tmpfilename'], ctx['filename'])
+ if self.params.get('updatetime', True):
+ filetime = ctx.get('fragment_filetime')
+ if filetime:
+ try:
+ os.utime(ctx['filename'], (time.time(), filetime))
+ except Exception:
+ pass
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
self._hook_progress({
diff --git a/youtube_dlc/extractor/bbc.py b/youtube_dlc/extractor/bbc.py
index 002c39c39..54cbcdc8e 100644
--- a/youtube_dlc/extractor/bbc.py
+++ b/youtube_dlc/extractor/bbc.py
@@ -981,7 +981,7 @@ class BBCIE(BBCCoUkIE):
group_id = self._search_regex(
r'
]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
webpage, 'group id', default=None)
- if playlist_id:
+ if group_id:
return self.url_result(
'https://www.bbc.co.uk/programmes/%s' % group_id,
ie=BBCCoUkIE.ie_key())
@@ -1092,10 +1092,26 @@ class BBCIE(BBCCoUkIE):
self._search_regex(
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
'bbcthree config', default='{}'),
- playlist_id, transform_source=js_to_json, fatal=False)
- if bbc3_config:
+ playlist_id, transform_source=js_to_json, fatal=False) or {}
+ payload = bbc3_config.get('payload') or {}
+ if payload:
+ clip = payload.get('currentClip') or {}
+ clip_vpid = clip.get('vpid')
+ clip_title = clip.get('title')
+ if clip_vpid and clip_title:
+ formats, subtitles = self._download_media_selector(clip_vpid)
+ self._sort_formats(formats)
+ return {
+ 'id': clip_vpid,
+ 'title': clip_title,
+ 'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
+ 'description': clip.get('description'),
+ 'duration': parse_duration(clip.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
bbc3_playlist = try_get(
- bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
+ payload, lambda x: x['content']['bbcMedia']['playlist'],
dict)
if bbc3_playlist:
playlist_title = bbc3_playlist.get('title') or playlist_title
@@ -1118,6 +1134,39 @@ class BBCIE(BBCCoUkIE):
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
+ initial_data = self._parse_json(self._search_regex(
+ r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
+ 'preload state', default='{}'), playlist_id, fatal=False)
+ if initial_data:
+ def parse_media(media):
+ if not media:
+ return
+ for item in (try_get(media, lambda x: x['media']['items'], list) or []):
+ item_id = item.get('id')
+ item_title = item.get('title')
+ if not (item_id and item_title):
+ continue
+ formats, subtitles = self._download_media_selector(item_id)
+ self._sort_formats(formats)
+ entries.append({
+ 'id': item_id,
+ 'title': item_title,
+ 'thumbnail': item.get('holdingImageUrl'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ for resp in (initial_data.get('data') or {}).values():
+ name = resp.get('name')
+ if name == 'media-experience':
+ parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
+ elif name == 'article':
+ for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
+ if block.get('type') != 'media':
+ continue
+ parse_media(block.get('model'))
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
def extract_all(pattern):
return list(filter(None, map(
lambda s: self._parse_json(s, playlist_id, fatal=False),
diff --git a/youtube_dlc/extractor/cda.py b/youtube_dlc/extractor/cda.py
index 0c3af23d5..d67900e62 100644
--- a/youtube_dlc/extractor/cda.py
+++ b/youtube_dlc/extractor/cda.py
@@ -5,10 +5,16 @@ import codecs
import re
from .common import InfoExtractor
+from ..compat import (
+ compat_chr,
+ compat_ord,
+ compat_urllib_parse_unquote,
+)
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ merge_dicts,
multipart_encode,
parse_duration,
random_birthday,
@@ -107,8 +113,9 @@ class CDAIE(InfoExtractor):
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
'view_count', default=None)
average_rating = self._search_regex(
- r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P
[0-9.]+)',
- webpage, 'rating', fatal=False, group='rating_value')
+ (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P[0-9.]+)',
+ r']+\bclass=["\']rating["\'][^>]*>(?P[0-9.]+)'), webpage, 'rating', fatal=False,
+ group='rating_value')
info_dict = {
'id': video_id,
@@ -123,6 +130,24 @@ class CDAIE(InfoExtractor):
'age_limit': 18 if need_confirm_age else 0,
}
+ # Source: https://www.cda.pl/js/player.js?t=1606154898
+ def decrypt_file(a):
+ for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
+ a = a.replace(p, '')
+ a = compat_urllib_parse_unquote(a)
+ b = []
+ for c in a:
+ f = compat_ord(c)
+ b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f))
+ a = ''.join(b)
+ a = a.replace('.cda.mp4', '')
+ for p in ('.2cda.pl', '.3cda.pl'):
+ a = a.replace(p, '.cda.pl')
+ if '/upstream' in a:
+ a = a.replace('/upstream', '.mp4/upstream')
+ return 'https://' + a
+ return 'https://' + a + '.mp4'
+
def extract_format(page, version):
json_str = self._html_search_regex(
r'player_data=(\\?["\'])(?P.+?)\1', page,
@@ -141,6 +166,8 @@ class CDAIE(InfoExtractor):
video['file'] = codecs.decode(video['file'], 'rot_13')
if video['file'].endswith('adc.mp4'):
video['file'] = video['file'].replace('adc.mp4', '.mp4')
+ elif not video['file'].startswith('http'):
+ video['file'] = decrypt_file(video['file'])
f = {
'url': video['file'],
}
@@ -179,4 +206,6 @@ class CDAIE(InfoExtractor):
self._sort_formats(formats)
- return info_dict
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ return merge_dicts(info_dict, info)
diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py
index c50bdbb79..9fe458038 100644
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@@ -620,6 +620,7 @@ from .markiza import (
from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE
from .mdr import MDRIE
+from .medaltv import MedalTVIE
from .mediaset import MediasetIE
from .mediasite import (
MediasiteIE,
@@ -1102,6 +1103,12 @@ from .stitcher import StitcherIE
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
+from .spreaker import (
+ SpreakerIE,
+ SpreakerPageIE,
+ SpreakerShowIE,
+ SpreakerShowPageIE,
+)
from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE
from .srgssr import (
@@ -1395,8 +1402,8 @@ from .vk import (
)
from .vlive import (
VLiveIE,
+ VLivePostIE,
VLiveChannelIE,
- VLivePlaylistIE
)
from .vodlocker import VodlockerIE
from .vodpl import VODPlIE
diff --git a/youtube_dlc/extractor/medaltv.py b/youtube_dlc/extractor/medaltv.py
new file mode 100644
index 000000000..1603b55f6
--- /dev/null
+++ b/youtube_dlc/extractor/medaltv.py
@@ -0,0 +1,131 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class MedalTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
+ 'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
+ 'info_dict': {
+ 'id': '34934644',
+ 'ext': 'mp4',
+ 'title': 'Quad Cold',
+ 'description': 'Medal,https://medal.tv/desktop/',
+ 'uploader': 'MowgliSB',
+ 'timestamp': 1603165266,
+ 'upload_date': '20201020',
+ 'uploader_id': 10619174,
+ }
+ }, {
+ 'url': 'https://medal.tv/clips/36787208',
+ 'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
+ 'info_dict': {
+ 'id': '36787208',
+ 'ext': 'mp4',
+ 'title': 'u tk me i tk u bigger',
+ 'description': 'Medal,https://medal.tv/desktop/',
+ 'uploader': 'Mimicc',
+ 'timestamp': 1605580939,
+ 'upload_date': '20201117',
+ 'uploader_id': 5156321,
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ hydration_data = self._parse_json(self._search_regex(
+ r'',
+ webpage, 'hydration data', default='{}'), video_id)
+
+ clip = try_get(
+ hydration_data, lambda x: x['clips'][video_id], dict) or {}
+ if not clip:
+ raise ExtractorError(
+ 'Could not find video information.', video_id=video_id)
+
+ title = clip['contentTitle']
+
+ source_width = int_or_none(clip.get('sourceWidth'))
+ source_height = int_or_none(clip.get('sourceHeight'))
+
+ aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
+
+ def add_item(container, item_url, height, id_key='format_id', item_id=None):
+ item_id = item_id or '%dp' % height
+ if item_id not in item_url:
+ return
+ width = int(round(aspect_ratio * height))
+ container.append({
+ 'url': item_url,
+ id_key: item_id,
+ 'width': width,
+ 'height': height
+ })
+
+ formats = []
+ thumbnails = []
+ for k, v in clip.items():
+ if not (v and isinstance(v, compat_str)):
+ continue
+ mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
+ if not mobj:
+ continue
+ prefix = mobj.group(1)
+ height = int_or_none(mobj.group(2))
+ if prefix == 'contentUrl':
+ add_item(
+ formats, v, height or source_height,
+ item_id=None if height else 'source')
+ elif prefix == 'thumbnail':
+ add_item(thumbnails, v, height, 'id')
+
+ error = clip.get('error')
+ if not formats and error:
+ if error == 404:
+ raise ExtractorError(
+ 'That clip does not exist.',
+ expected=True, video_id=video_id)
+ else:
+ raise ExtractorError(
+ 'An unknown error occurred ({0}).'.format(error),
+ video_id=video_id)
+
+ self._sort_formats(formats)
+
+ # Necessary because the id of the author is not known in advance.
+ # Won't raise an issue if no profile can be found as this is optional.
+ author = try_get(
+ hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
+ author_id = str_or_none(author.get('id'))
+ author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': clip.get('contentDescription'),
+ 'uploader': author.get('displayName'),
+ 'timestamp': float_or_none(clip.get('created'), 1000),
+ 'uploader_id': author_id,
+ 'uploader_url': author_url,
+ 'duration': int_or_none(clip.get('videoLengthSeconds')),
+ 'view_count': int_or_none(clip.get('views')),
+ 'like_count': int_or_none(clip.get('likes')),
+ 'comment_count': int_or_none(clip.get('comments')),
+ }
diff --git a/youtube_dlc/extractor/nrk.py b/youtube_dlc/extractor/nrk.py
index 84aacbcda..4a395546f 100644
--- a/youtube_dlc/extractor/nrk.py
+++ b/youtube_dlc/extractor/nrk.py
@@ -9,6 +9,7 @@ from ..compat import (
compat_urllib_parse_unquote,
)
from ..utils import (
+ determine_ext,
ExtractorError,
int_or_none,
js_to_json,
@@ -16,185 +17,13 @@ from ..utils import (
parse_age_limit,
parse_duration,
try_get,
+ url_or_none,
)
class NRKBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['NO']
- _api_host = None
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
-
- for api_host in api_hosts:
- data = self._download_json(
- 'http://%s/mediaelement/%s' % (api_host, video_id),
- video_id, 'Downloading mediaelement JSON',
- fatal=api_host == api_hosts[-1])
- if not data:
- continue
- self._api_host = api_host
- break
-
- title = data.get('fullTitle') or data.get('mainTitle') or data['title']
- video_id = data.get('id') or video_id
-
- entries = []
-
- conviva = data.get('convivaStatistics') or {}
- live = (data.get('mediaElementType') == 'Live'
- or data.get('isLive') is True or conviva.get('isLive'))
-
- def make_title(t):
- return self._live_title(t) if live else t
-
- media_assets = data.get('mediaAssets')
- if media_assets and isinstance(media_assets, list):
- def video_id_and_title(idx):
- return ((video_id, title) if len(media_assets) == 1
- else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
- for num, asset in enumerate(media_assets, 1):
- asset_url = asset.get('url')
- if not asset_url:
- continue
- formats = self._extract_akamai_formats(asset_url, video_id)
- if not formats:
- continue
- self._sort_formats(formats)
-
- # Some f4m streams may not work with hdcore in fragments' URLs
- for f in formats:
- extra_param = f.get('extra_param_to_segment_url')
- if extra_param and 'hdcore' in extra_param:
- del f['extra_param_to_segment_url']
-
- entry_id, entry_title = video_id_and_title(num)
- duration = parse_duration(asset.get('duration'))
- subtitles = {}
- for subtitle in ('webVtt', 'timedText'):
- subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
- if subtitle_url:
- subtitles.setdefault('no', []).append({
- 'url': compat_urllib_parse_unquote(subtitle_url)
- })
- entries.append({
- 'id': asset.get('carrierId') or entry_id,
- 'title': make_title(entry_title),
- 'duration': duration,
- 'subtitles': subtitles,
- 'formats': formats,
- })
-
- if not entries:
- media_url = data.get('mediaUrl')
- if media_url:
- formats = self._extract_akamai_formats(media_url, video_id)
- self._sort_formats(formats)
- duration = parse_duration(data.get('duration'))
- entries = [{
- 'id': video_id,
- 'title': make_title(title),
- 'duration': duration,
- 'formats': formats,
- }]
-
- if not entries:
- MESSAGES = {
- 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
- 'ProgramRightsHasExpired': 'Programmet har gått ut',
- 'NoProgramRights': 'Ikke tilgjengelig',
- 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
- }
- message_type = data.get('messageType', '')
- # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
- if 'IsGeoBlocked' in message_type:
- self.raise_geo_restricted(
- msg=MESSAGES.get('ProgramIsGeoBlocked'),
- countries=self._GEO_COUNTRIES)
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, MESSAGES.get(
- message_type, message_type)),
- expected=True)
-
- series = conviva.get('seriesName') or data.get('seriesTitle')
- episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
-
- season_number = None
- episode_number = None
- if data.get('mediaElementType') == 'Episode':
- _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
- data.get('relativeOriginUrl', '')
- EPISODENUM_RE = [
- r'/s(?P\d{,2})e(?P\d{,2})\.',
- r'/sesong-(?P\d{,2})/episode-(?P\d{,2})',
- ]
- season_number = int_or_none(self._search_regex(
- EPISODENUM_RE, _season_episode, 'season number',
- default=None, group='season'))
- episode_number = int_or_none(self._search_regex(
- EPISODENUM_RE, _season_episode, 'episode number',
- default=None, group='episode'))
-
- thumbnails = None
- images = data.get('images')
- if images and isinstance(images, dict):
- web_images = images.get('webImages')
- if isinstance(web_images, list):
- thumbnails = [{
- 'url': image['imageUrl'],
- 'width': int_or_none(image.get('width')),
- 'height': int_or_none(image.get('height')),
- } for image in web_images if image.get('imageUrl')]
-
- description = data.get('description')
- category = data.get('mediaAnalytics', {}).get('category')
-
- common_info = {
- 'description': description,
- 'series': series,
- 'episode': episode,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'categories': [category] if category else None,
- 'age_limit': parse_age_limit(data.get('legalAge')),
- 'thumbnails': thumbnails,
- }
-
- vcodec = 'none' if data.get('mediaType') == 'Audio' else None
-
- for entry in entries:
- entry.update(common_info)
- for f in entry['formats']:
- f['vcodec'] = vcodec
-
- points = data.get('shortIndexPoints')
- if isinstance(points, list):
- chapters = []
- for next_num, point in enumerate(points, start=1):
- if not isinstance(point, dict):
- continue
- start_time = parse_duration(point.get('startPoint'))
- if start_time is None:
- continue
- end_time = parse_duration(
- data.get('duration')
- if next_num == len(points)
- else points[next_num].get('startPoint'))
- if end_time is None:
- continue
- chapters.append({
- 'start_time': start_time,
- 'end_time': end_time,
- 'title': point.get('title'),
- })
- if chapters and len(entries) == 1:
- entries[0]['chapters'] = chapters
-
- return self.playlist_result(entries, video_id, title, description)
-
class NRKIE(NRKBaseIE):
_VALID_URL = r'''(?x)
@@ -202,13 +31,13 @@ class NRKIE(NRKBaseIE):
nrk:|
https?://
(?:
- (?:www\.)?nrk\.no/video/PS\*|
+ (?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)|
v8[-.]psapi\.nrk\.no/mediaelement/
)
)
- (?P[^?#&]+)
+ (?P[^?\#&]+)
'''
- _API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
+
_TESTS = [{
# video
'url': 'http://www.nrk.no/video/PS*150533',
@@ -240,8 +69,76 @@ class NRKIE(NRKBaseIE):
}, {
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
'only_matching': True,
+ }, {
+ 'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
+ 'only_matching': True,
}]
+ def _extract_from_playback(self, video_id):
+ manifest = self._download_json(
+ 'http://psapi.nrk.no/playback/manifest/%s' % video_id,
+ video_id, 'Downloading manifest JSON')
+
+ playable = manifest['playable']
+
+ formats = []
+ for asset in playable['assets']:
+ if not isinstance(asset, dict):
+ continue
+ if asset.get('encrypted'):
+ continue
+ format_url = url_or_none(asset.get('url'))
+ if not format_url:
+ continue
+ if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ data = self._download_json(
+ 'http://psapi.nrk.no/playback/metadata/%s' % video_id,
+ video_id, 'Downloading metadata JSON')
+
+ preplay = data['preplay']
+ titles = preplay['titles']
+ title = titles['title']
+ alt_title = titles.get('subtitle')
+
+ description = preplay.get('description')
+ duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration'))
+
+ thumbnails = []
+ for image in try_get(
+ preplay, lambda x: x['poster']['images'], list) or []:
+ if not isinstance(image, dict):
+ continue
+ image_url = url_or_none(image.get('url'))
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('pixelWidth')),
+ 'height': int_or_none(image.get('pixelHeight')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'alt_title': alt_title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._extract_from_playback(video_id)
+
class NRKTVIE(NRKBaseIE):
IE_DESC = 'NRK TV and NRK Radio'
@@ -380,6 +277,181 @@ class NRKTVIE(NRKBaseIE):
'only_matching': True,
}]
+ _api_host = None
+
+ def _extract_from_mediaelement(self, video_id):
+ api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
+
+ for api_host in api_hosts:
+ data = self._download_json(
+ 'http://%s/mediaelement/%s' % (api_host, video_id),
+ video_id, 'Downloading mediaelement JSON',
+ fatal=api_host == api_hosts[-1])
+ if not data:
+ continue
+ self._api_host = api_host
+ break
+
+ title = data.get('fullTitle') or data.get('mainTitle') or data['title']
+ video_id = data.get('id') or video_id
+
+ entries = []
+
+ conviva = data.get('convivaStatistics') or {}
+ live = (data.get('mediaElementType') == 'Live'
+ or data.get('isLive') is True or conviva.get('isLive'))
+
+ def make_title(t):
+ return self._live_title(t) if live else t
+
+ media_assets = data.get('mediaAssets')
+ if media_assets and isinstance(media_assets, list):
+ def video_id_and_title(idx):
+ return ((video_id, title) if len(media_assets) == 1
+ else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
+ for num, asset in enumerate(media_assets, 1):
+ asset_url = asset.get('url')
+ if not asset_url:
+ continue
+ formats = self._extract_akamai_formats(asset_url, video_id)
+ if not formats:
+ continue
+ self._sort_formats(formats)
+
+ # Some f4m streams may not work with hdcore in fragments' URLs
+ for f in formats:
+ extra_param = f.get('extra_param_to_segment_url')
+ if extra_param and 'hdcore' in extra_param:
+ del f['extra_param_to_segment_url']
+
+ entry_id, entry_title = video_id_and_title(num)
+ duration = parse_duration(asset.get('duration'))
+ subtitles = {}
+ for subtitle in ('webVtt', 'timedText'):
+ subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
+ if subtitle_url:
+ subtitles.setdefault('no', []).append({
+ 'url': compat_urllib_parse_unquote(subtitle_url)
+ })
+ entries.append({
+ 'id': asset.get('carrierId') or entry_id,
+ 'title': make_title(entry_title),
+ 'duration': duration,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ })
+
+ if not entries:
+ media_url = data.get('mediaUrl')
+ if media_url:
+ formats = self._extract_akamai_formats(media_url, video_id)
+ self._sort_formats(formats)
+ duration = parse_duration(data.get('duration'))
+ entries = [{
+ 'id': video_id,
+ 'title': make_title(title),
+ 'duration': duration,
+ 'formats': formats,
+ }]
+
+ if not entries:
+ MESSAGES = {
+ 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
+ 'ProgramRightsHasExpired': 'Programmet har gått ut',
+ 'NoProgramRights': 'Ikke tilgjengelig',
+ 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
+ }
+ message_type = data.get('messageType', '')
+ # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
+ if 'IsGeoBlocked' in message_type:
+ self.raise_geo_restricted(
+ msg=MESSAGES.get('ProgramIsGeoBlocked'),
+ countries=self._GEO_COUNTRIES)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, MESSAGES.get(
+ message_type, message_type)),
+ expected=True)
+
+ series = conviva.get('seriesName') or data.get('seriesTitle')
+ episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
+
+ season_number = None
+ episode_number = None
+ if data.get('mediaElementType') == 'Episode':
+ _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
+ data.get('relativeOriginUrl', '')
+ EPISODENUM_RE = [
+ r'/s(?P\d{,2})e(?P\d{,2})\.',
+ r'/sesong-(?P\d{,2})/episode-(?P\d{,2})',
+ ]
+ season_number = int_or_none(self._search_regex(
+ EPISODENUM_RE, _season_episode, 'season number',
+ default=None, group='season'))
+ episode_number = int_or_none(self._search_regex(
+ EPISODENUM_RE, _season_episode, 'episode number',
+ default=None, group='episode'))
+
+ thumbnails = None
+ images = data.get('images')
+ if images and isinstance(images, dict):
+ web_images = images.get('webImages')
+ if isinstance(web_images, list):
+ thumbnails = [{
+ 'url': image['imageUrl'],
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in web_images if image.get('imageUrl')]
+
+ description = data.get('description')
+ category = data.get('mediaAnalytics', {}).get('category')
+
+ common_info = {
+ 'description': description,
+ 'series': series,
+ 'episode': episode,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'categories': [category] if category else None,
+ 'age_limit': parse_age_limit(data.get('legalAge')),
+ 'thumbnails': thumbnails,
+ }
+
+ vcodec = 'none' if data.get('mediaType') == 'Audio' else None
+
+ for entry in entries:
+ entry.update(common_info)
+ for f in entry['formats']:
+ f['vcodec'] = vcodec
+
+ points = data.get('shortIndexPoints')
+ if isinstance(points, list):
+ chapters = []
+ for next_num, point in enumerate(points, start=1):
+ if not isinstance(point, dict):
+ continue
+ start_time = parse_duration(point.get('startPoint'))
+ if start_time is None:
+ continue
+ end_time = parse_duration(
+ data.get('duration')
+ if next_num == len(points)
+ else points[next_num].get('startPoint'))
+ if end_time is None:
+ continue
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'title': point.get('title'),
+ })
+ if chapters and len(entries) == 1:
+ entries[0]['chapters'] = chapters
+
+ return self.playlist_result(entries, video_id, title, description)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self._extract_from_mediaelement(video_id)
+
class NRKTVEpisodeIE(InfoExtractor):
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P[^/]+/sesong/\d+/episode/\d+)'
diff --git a/youtube_dlc/extractor/spreaker.py b/youtube_dlc/extractor/spreaker.py
new file mode 100644
index 000000000..beee6670c
--- /dev/null
+++ b/youtube_dlc/extractor/spreaker.py
@@ -0,0 +1,176 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+def _extract_episode(data, episode_id=None):
+ title = data['title']
+ download_url = data['download_url']
+
+ series = try_get(data, lambda x: x['show']['title'], compat_str)
+ uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
+
+ thumbnails = []
+ for image in ('image_original', 'image_medium', 'image'):
+ image_url = url_or_none(data.get('%s_url' % image))
+ if image_url:
+ thumbnails.append({'url': image_url})
+
+ def stats(key):
+ return int_or_none(try_get(
+ data,
+ (lambda x: x['%ss_count' % key],
+ lambda x: x['stats']['%ss' % key])))
+
+ def duration(key):
+ return float_or_none(data.get(key), scale=1000)
+
+ return {
+ 'id': compat_str(episode_id or data['episode_id']),
+ 'url': download_url,
+ 'display_id': data.get('permalink'),
+ 'title': title,
+ 'description': data.get('description'),
+ 'timestamp': unified_timestamp(data.get('published_at')),
+ 'uploader': uploader,
+ 'uploader_id': str_or_none(data.get('author_id')),
+ 'creator': uploader,
+ 'duration': duration('duration') or duration('length'),
+ 'view_count': stats('play'),
+ 'like_count': stats('like'),
+ 'comment_count': stats('message'),
+ 'format': 'MPEG Layer 3',
+ 'format_id': 'mp3',
+ 'container': 'mp3',
+ 'ext': 'mp3',
+ 'thumbnails': thumbnails,
+ 'series': series,
+ 'extractor_key': SpreakerIE.ie_key(),
+ }
+
+
+class SpreakerIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ api\.spreaker\.com/
+ (?:
+ (?:download/)?episode|
+ v2/episodes
+ )/
+ (?P\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://api.spreaker.com/episode/12534508',
+ 'info_dict': {
+ 'id': '12534508',
+ 'display_id': 'swm-ep15-how-to-market-your-music-part-2',
+ 'ext': 'mp3',
+ 'title': 'EP:15 | Music Marketing (Likes) - Part 2',
+ 'description': 'md5:0588c43e27be46423e183076fa071177',
+ 'timestamp': 1502250336,
+ 'upload_date': '20170809',
+ 'uploader': 'SWM',
+ 'uploader_id': '9780658',
+ 'duration': 1063.42,
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'series': 'Success With Music (SWM)',
+ },
+ }, {
+ 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ episode_id = self._match_id(url)
+ data = self._download_json(
+ 'https://api.spreaker.com/v2/episodes/%s' % episode_id,
+ episode_id)['response']['episode']
+ return _extract_episode(data, episode_id)
+
+
+class SpreakerPageIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ episode_id = self._search_regex(
+ (r'data-episode_id=["\'](?P\d+)',
+ r'episode_id\s*:\s*(?P\d+)'), webpage, 'episode id')
+ return self.url_result(
+ 'https://api.spreaker.com/episode/%s' % episode_id,
+ ie=SpreakerIE.ie_key(), video_id=episode_id)
+
+
+class SpreakerShowIE(InfoExtractor):
+ _VALID_URL = r'https?://api\.spreaker\.com/show/(?P\d+)'
+ _TESTS = [{
+ 'url': 'https://www.spreaker.com/show/3-ninjas-podcast',
+ 'info_dict': {
+ 'id': '4652058',
+ },
+ 'playlist_mincount': 118,
+ }]
+
+ def _entries(self, show_id):
+ for page_num in itertools.count(1):
+ episodes = self._download_json(
+ 'https://api.spreaker.com/show/%s/episodes' % show_id,
+ show_id, note='Downloading JSON page %d' % page_num, query={
+ 'page': page_num,
+ 'max_per_page': 100,
+ })
+ pager = try_get(episodes, lambda x: x['response']['pager'], dict)
+ if not pager:
+ break
+ results = pager.get('results')
+ if not results or not isinstance(results, list):
+ break
+ for result in results:
+ if not isinstance(result, dict):
+ continue
+ yield _extract_episode(result)
+ if page_num == pager.get('last_page'):
+ break
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+ return self.playlist_result(self._entries(show_id), playlist_id=show_id)
+
+
+class SpreakerShowPageIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.spreaker.com/show/success-with-music',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ show_id = self._search_regex(
+ r'show_id\s*:\s*(?P\d+)', webpage, 'show id')
+ return self.url_result(
+ 'https://api.spreaker.com/show/%s' % show_id,
+ ie=SpreakerShowIE.ie_key(), video_id=show_id)
diff --git a/youtube_dlc/extractor/viki.py b/youtube_dlc/extractor/viki.py
index 2e3794344..09da4338d 100644
--- a/youtube_dlc/extractor/viki.py
+++ b/youtube_dlc/extractor/viki.py
@@ -21,6 +21,7 @@ from ..utils import (
parse_age_limit,
parse_iso8601,
sanitized_Request,
+ std_headers,
)
@@ -227,8 +228,10 @@ class VikiIE(VikiBaseIE):
resp = self._download_json(
'https://www.viki.com/api/videos/' + video_id,
- video_id, 'Downloading video JSON',
- headers={'x-viki-app-ver': '4.0.57'})
+ video_id, 'Downloading video JSON', headers={
+ 'x-client-user-agent': std_headers['User-Agent'],
+ 'x-viki-app-ver': '4.0.57',
+ })
video = resp['video']
self._check_errors(video)
diff --git a/youtube_dlc/extractor/vlive.py b/youtube_dlc/extractor/vlive.py
index 935560b57..223709b1e 100644
--- a/youtube_dlc/extractor/vlive.py
+++ b/youtube_dlc/extractor/vlive.py
@@ -1,55 +1,50 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-import time
import itertools
+import json
-from .common import InfoExtractor
from .naver import NaverBaseIE
-from ..compat import compat_str
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
from ..utils import (
ExtractorError,
+ int_or_none,
merge_dicts,
+ str_or_none,
+ strip_or_none,
try_get,
urlencode_postdata,
)
-class VLiveIE(NaverBaseIE):
+class VLiveBaseIE(NaverBaseIE):
+ _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
+
+
+class VLiveIE(VLiveBaseIE):
IE_NAME = 'vlive'
- _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|post)/(?P(?:\d-)?[0-9]+)'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P[0-9]+)'
_NETRC_MACHINE = 'vlive'
_TESTS = [{
- 'url': 'https://www.vlive.tv/video/1326',
+ 'url': 'http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
'ext': 'mp4',
- 'title': "[V LIVE] Girl's Day's Broadcast",
+ 'title': "Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
},
- },
- {
- 'url': 'https://vlive.tv/post/1-18244258',
- 'md5': 'cc7314812855ce56de70a06a27314983',
- 'info_dict': {
- 'id': '1326',
- 'ext': 'mp4',
- 'title': "[V LIVE] Girl's Day's Broadcast",
- 'creator': "Girl's Day",
- 'view_count': int,
- 'uploader_id': 'muploader_a',
- },
- },
- {
- 'url': 'https://www.vlive.tv/video/16937',
+ }, {
+ 'url': 'http://www.vlive.tv/video/16937',
'info_dict': {
'id': '16937',
'ext': 'mp4',
- 'title': '[V LIVE] 첸백시 걍방',
+ 'title': '첸백시 걍방',
'creator': 'EXO',
'view_count': int,
'subtitles': 'mincount:12',
@@ -70,12 +65,15 @@ class VLiveIE(NaverBaseIE):
'subtitles': 'mincount:10',
},
'skip': 'This video is only available for CH+ subscribers',
+ }, {
+ 'url': 'https://www.vlive.tv/embed/1326',
+ 'only_matching': True,
+ }, {
+ # works only with gcc=KR
+ 'url': 'https://www.vlive.tv/video/225019',
+ 'only_matching': True,
}]
- @classmethod
- def suitable(cls, url):
- return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
-
def _real_initialize(self):
self._login()
@@ -107,118 +105,159 @@ class VLiveIE(NaverBaseIE):
if not is_logged_in():
raise ExtractorError('Unable to log in', expected=True)
- def _real_extract(self, url):
- # url may match on a post or a video url with a post_id potentially matching a video_id
- working_id = self._match_id(url)
- webpage = self._download_webpage(url, working_id)
-
- PARAMS_RE = r'window\.__PRELOADED_STATE__\s*=\s*({.*});?\s*'
- PARAMS_FIELD = 'params'
-
- params = self._search_regex(
- PARAMS_RE, webpage, PARAMS_FIELD, default='', flags=re.DOTALL)
- params = self._parse_json(params, working_id, fatal=False)
-
- video_params = try_get(params, lambda x: x["postDetail"]["post"]["officialVideo"], dict)
-
- if video_params is None:
- error = try_get(params, lambda x: x["postDetail"]["error"], dict)
- error_data = try_get(error, lambda x: x["data"], dict)
- error_video = try_get(error_data, lambda x: x["officialVideo"], dict)
- error_msg = try_get(error, lambda x: x["message"], compat_str)
- product_type = try_get(error_data,
- [lambda x: x["officialVideo"]["productType"],
- lambda x: x["board"]["boardType"]],
- compat_str)
-
- if error_video is not None:
- if product_type in ('VLIVE_PLUS', 'VLIVE+'):
- self.raise_login_required('This video is only available with V LIVE+.')
- elif error_msg is not None:
- raise ExtractorError('V LIVE reported the following error: %s' % error_msg)
- else:
- raise ExtractorError('Failed to extract video parameters.')
- elif 'post' in url:
- raise ExtractorError('Url does not appear to be a video post.', expected=True)
- else:
- raise ExtractorError('Failed to extract video parameters.')
-
- video_id = working_id if 'video' in url else str(video_params["videoSeq"])
+ def _call_api(self, path_template, video_id, fields=None):
+ query = {'appId': self._APP_ID, 'gcc': 'KR'}
+ if fields:
+ query['fields'] = fields
+ try:
+ return self._download_json(
+ 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
+ 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
+ headers={'Referer': 'https://www.vlive.tv/'}, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self.raise_login_required(json.loads(e.cause.read().decode())['message'])
+ raise
- video_type = video_params["type"]
- if video_type in ('VOD'):
- encoding_status = video_params["encodingStatus"]
- if encoding_status == 'COMPLETE':
- return self._replay(video_id, webpage, params, video_params)
- else:
- raise ExtractorError('VOD encoding not yet complete. Please try again later.',
- expected=True)
- elif video_type in ('LIVE'):
- video_status = video_params["status"]
- if video_status in ('RESERVED'):
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ post = self._call_api(
+ 'post/v1.0/officialVideoPost-%s', video_id,
+ 'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
+
+ video = post['officialVideo']
+
+ def get_common_fields():
+ channel = post.get('channel') or {}
+ return {
+ 'title': video.get('title'),
+ 'creator': post.get('author', {}).get('nickname'),
+ 'channel': channel.get('channelName'),
+ 'channel_id': channel.get('channelCode'),
+ 'duration': int_or_none(video.get('playTime')),
+ 'view_count': int_or_none(video.get('playCount')),
+ 'like_count': int_or_none(video.get('likeCount')),
+ 'comment_count': int_or_none(video.get('commentCount')),
+ }
+
+ video_type = video.get('type')
+ if video_type == 'VOD':
+ inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
+ vod_id = video['vodId']
+ return merge_dicts(
+ get_common_fields(),
+ self._extract_video_info(video_id, vod_id, inkey))
+ elif video_type == 'LIVE':
+ status = video.get('status')
+ if status == 'ON_AIR':
+ stream_url = self._call_api(
+ 'old/v3/live/%s/playInfo',
+ video_id)['result']['adaptiveStreamUrl']
+ formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
+ info = get_common_fields()
+ info.update({
+ 'title': self._live_title(video['title']),
+ 'id': video_id,
+ 'formats': formats,
+ 'is_live': True,
+ })
+ return info
+ elif status == 'ENDED':
+ raise ExtractorError(
+ 'Uploading for replay. Please wait...', expected=True)
+ elif status == 'RESERVED':
raise ExtractorError('Coming soon!', expected=True)
- elif video_status in ('ENDED', 'END'):
- raise ExtractorError('Uploading for replay. Please wait...', expected=True)
+ elif video.get('exposeStatus') == 'CANCEL':
+ raise ExtractorError(
+ 'We are sorry, but the live broadcast has been canceled.',
+ expected=True)
else:
- return self._live(video_id, webpage, params)
- else:
- raise ExtractorError('Unknown video type %s' % video_type)
-
- def _get_common_fields(self, webpage, params):
- title = self._og_search_title(webpage)
- description = self._html_search_meta(
- ['og:description', 'description', 'twitter:description'],
- webpage, 'description', default=None)
- creator = (try_get(params, lambda x: x["channel"]["channel"]["channelName"], compat_str)
- or self._search_regex(r'on (.*) channel', description or '', 'creator', fatal=False))
- thumbnail = self._og_search_thumbnail(webpage)
- return {
- 'title': title,
- 'creator': creator,
- 'thumbnail': thumbnail,
- }
-
- def _live(self, video_id, webpage, params):
- LIVE_INFO_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/old/v3/live/%s/playInfo' % video_id
- play_info = self._download_json(LIVE_INFO_ENDPOINT, video_id,
- headers={"referer": "https://www.vlive.tv"})
+ raise ExtractorError('Unknown status ' + status)
- streams = try_get(play_info, lambda x: x["result"]["streamList"], list) or []
- formats = []
- for stream in streams:
- formats.extend(self._extract_m3u8_formats(
- stream['serviceUrl'], video_id, 'mp4',
- fatal=False, live=True))
- self._sort_formats(formats)
+class VLivePostIE(VLiveIE):
+ IE_NAME = 'vlive:post'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P\d-\d+)'
+ _TESTS = [{
+ # uploadType = SOS
+ 'url': 'https://www.vlive.tv/post/1-20088044',
+ 'info_dict': {
+ 'id': '1-20088044',
+ 'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
+ 'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
+ },
+ 'playlist_count': 3,
+ }, {
+ # uploadType = V
+ 'url': 'https://www.vlive.tv/post/1-20087926',
+ 'info_dict': {
+ 'id': '1-20087926',
+ 'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
+ },
+ 'playlist_count': 1,
+ }]
+ _FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
+ _SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
+ _INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
- info = self._get_common_fields(webpage, params)
- info.update({
- 'title': self._live_title(info['title']),
- 'id': video_id,
- 'formats': formats,
- 'is_live': True,
- })
- return info
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
- def _replay(self, video_id, webpage, params, video_params):
- long_video_id = video_params["vodId"]
+ post = self._call_api(
+ 'post/v1.0/post-%s', post_id,
+ 'attachments{video},officialVideo{videoSeq},plainBody,title')
- VOD_KEY_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/%s/inkey' % video_id
- key_json = self._download_json(VOD_KEY_ENDPOINT, video_id,
- headers={"referer": "https://www.vlive.tv"})
- key = key_json["inkey"]
+ video_seq = str_or_none(try_get(
+ post, lambda x: x['officialVideo']['videoSeq']))
+ if video_seq:
+ return self.url_result(
+ 'http://www.vlive.tv/video/' + video_seq,
+ VLiveIE.ie_key(), video_seq)
- return merge_dicts(
- self._get_common_fields(webpage, params),
- self._extract_video_info(video_id, long_video_id, key))
+ title = post['title']
+ entries = []
+ for idx, video in enumerate(post['attachments']['video'].values()):
+ video_id = video.get('videoId')
+ if not video_id:
+ continue
+ upload_type = video.get('uploadType')
+ upload_info = video.get('uploadInfo') or {}
+ entry = None
+ if upload_type == 'SOS':
+ download = self._call_api(
+ self._SOS_TMPL, video_id)['videoUrl']['download']
+ formats = []
+ for f_id, f_url in download.items():
+ formats.append({
+ 'format_id': f_id,
+ 'url': f_url,
+ 'height': int_or_none(f_id[:-1]),
+ })
+ self._sort_formats(formats)
+ entry = {
+ 'formats': formats,
+ 'id': video_id,
+ 'thumbnail': upload_info.get('imageUrl'),
+ }
+ elif upload_type == 'V':
+ vod_id = upload_info.get('videoId')
+ if not vod_id:
+ continue
+ inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
+ entry = self._extract_video_info(video_id, vod_id, inkey)
+ if entry:
+ entry['title'] = '%s_part%s' % (title, idx)
+ entries.append(entry)
+ return self.playlist_result(
+ entries, post_id, title, strip_or_none(post.get('plainBody')))
-class VLiveChannelIE(InfoExtractor):
+class VLiveChannelIE(VLiveBaseIE):
IE_NAME = 'vlive:channel'
- _VALID_URL = r'https?://(?:(?:www|m)\.)?(?:channels\.vlive\.tv/|vlive\.tv/channels?/)(?P[0-9A-Z]+)'
+ _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P[0-9A-Z]+)'
_TESTS = [{
- 'url': 'https://channels.vlive.tv/FCD4B',
+ 'url': 'http://channels.vlive.tv/FCD4B',
'info_dict': {
'id': 'FCD4B',
'title': 'MAMAMOO',
@@ -226,63 +265,39 @@ class VLiveChannelIE(InfoExtractor):
'playlist_mincount': 110
}, {
'url': 'https://www.vlive.tv/channel/FCD4B',
- 'info_dict': {
- 'id': 'FCD4B',
- 'title': 'MAMAMOO',
- },
- 'playlist_mincount': 110
+ 'only_matching': True,
}]
- _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
+
+ def _call_api(self, path, channel_key_suffix, channel_value, note, query):
+ q = {
+ 'app_id': self._APP_ID,
+ 'channel' + channel_key_suffix: channel_value,
+ }
+ q.update(query)
+ return self._download_json(
+ 'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
+ channel_value, note='Downloading ' + note, query=q)['result']
def _real_extract(self, url):
channel_code = self._match_id(url)
- webpage = self._download_webpage(
- 'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
-
- app_id = None
-
- app_js_url = self._search_regex(
- r'