mirror of https://github.com/yt-dlp/yt-dlp
Update to ytdl-commit-cf2dbec
pull/103/headcf2dbec630
Except: [kakao] improve info extraction and detect geo restrictiond8085580f6
parent
5e41dca334
commit
bc2ca1bb75
@ -0,0 +1,193 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
unified_strdate,
|
||||||
|
xpath_text,
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DreiSatIE(InfoExtractor):
|
||||||
|
IE_NAME = '3sat'
|
||||||
|
_GEO_COUNTRIES = ['DE']
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
||||||
|
'md5': 'be37228896d30a88f315b638900a026e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '45918',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Waidmannsheil',
|
||||||
|
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||||
|
'uploader': 'SCHWEIZWEIT',
|
||||||
|
'uploader_id': '100000210',
|
||||||
|
'upload_date': '20140913'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True, # m3u8 downloads
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
||||||
|
param_groups = {}
|
||||||
|
for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
|
||||||
|
group_id = param_group.get(self._xpath_ns(
|
||||||
|
'id', 'http://www.w3.org/XML/1998/namespace'))
|
||||||
|
params = {}
|
||||||
|
for param in param_group:
|
||||||
|
params[param.get('name')] = param.get('value')
|
||||||
|
param_groups[group_id] = params
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in smil.findall(self._xpath_ns('.//video', namespace)):
|
||||||
|
src = video.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
||||||
|
group_id = video.get('paramGroup')
|
||||||
|
param_group = param_groups[group_id]
|
||||||
|
for proto in param_group['protocols'].split(','):
|
||||||
|
formats.append({
|
||||||
|
'url': '%s://%s' % (proto, param_group['host']),
|
||||||
|
'app': param_group['app'],
|
||||||
|
'play_path': src,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': '%s-%d' % (proto, bitrate),
|
||||||
|
'tbr': bitrate,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def extract_from_xml_url(self, video_id, xml_url):
|
||||||
|
doc = self._download_xml(
|
||||||
|
xml_url, video_id,
|
||||||
|
note='Downloading video info',
|
||||||
|
errnote='Failed to download video info')
|
||||||
|
|
||||||
|
status_code = xpath_text(doc, './status/statuscode')
|
||||||
|
if status_code and status_code != 'ok':
|
||||||
|
if status_code == 'notVisibleAnymore':
|
||||||
|
message = 'Video %s is not available' % video_id
|
||||||
|
else:
|
||||||
|
message = '%s returned error: %s' % (self.IE_NAME, status_code)
|
||||||
|
raise ExtractorError(message, expected=True)
|
||||||
|
|
||||||
|
title = xpath_text(doc, './/information/title', 'title', True)
|
||||||
|
|
||||||
|
urls = []
|
||||||
|
formats = []
|
||||||
|
for fnode in doc.findall('.//formitaeten/formitaet'):
|
||||||
|
video_url = xpath_text(fnode, 'url')
|
||||||
|
if not video_url or video_url in urls:
|
||||||
|
continue
|
||||||
|
urls.append(video_url)
|
||||||
|
|
||||||
|
is_available = 'http://www.metafilegenerator' not in video_url
|
||||||
|
geoloced = 'static_geoloced_online' in video_url
|
||||||
|
if not is_available or geoloced:
|
||||||
|
continue
|
||||||
|
|
||||||
|
format_id = fnode.attrib['basetype']
|
||||||
|
format_m = re.match(r'''(?x)
|
||||||
|
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
|
||||||
|
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
|
||||||
|
''', format_id)
|
||||||
|
|
||||||
|
ext = determine_ext(video_url, None) or format_m.group('container')
|
||||||
|
|
||||||
|
if ext == 'meta':
|
||||||
|
continue
|
||||||
|
elif ext == 'smil':
|
||||||
|
formats.extend(self._extract_smil_formats(
|
||||||
|
video_url, video_id, fatal=False))
|
||||||
|
elif ext == 'm3u8':
|
||||||
|
# the certificates are misconfigured (see
|
||||||
|
# https://github.com/ytdl-org/youtube-dl/issues/8665)
|
||||||
|
if video_url.startswith('https://'):
|
||||||
|
continue
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False))
|
||||||
|
elif ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
video_url, video_id, f4m_id=format_id, fatal=False))
|
||||||
|
else:
|
||||||
|
quality = xpath_text(fnode, './quality')
|
||||||
|
if quality:
|
||||||
|
format_id += '-' + quality
|
||||||
|
|
||||||
|
abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
|
||||||
|
vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
|
||||||
|
|
||||||
|
tbr = int_or_none(self._search_regex(
|
||||||
|
r'_(\d+)k', video_url, 'bitrate', None))
|
||||||
|
if tbr and vbr and not abr:
|
||||||
|
abr = tbr - vbr
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': ext,
|
||||||
|
'acodec': format_m.group('acodec'),
|
||||||
|
'vcodec': format_m.group('vcodec'),
|
||||||
|
'abr': abr,
|
||||||
|
'vbr': vbr,
|
||||||
|
'tbr': tbr,
|
||||||
|
'width': int_or_none(xpath_text(fnode, './width')),
|
||||||
|
'height': int_or_none(xpath_text(fnode, './height')),
|
||||||
|
'filesize': int_or_none(xpath_text(fnode, './filesize')),
|
||||||
|
'protocol': format_m.group('proto').lower(),
|
||||||
|
})
|
||||||
|
|
||||||
|
geolocation = xpath_text(doc, './/details/geolocation')
|
||||||
|
if not formats and geolocation and geolocation != 'none':
|
||||||
|
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for node in doc.findall('.//teaserimages/teaserimage'):
|
||||||
|
thumbnail_url = node.text
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnail = {
|
||||||
|
'url': thumbnail_url,
|
||||||
|
}
|
||||||
|
thumbnail_key = node.get('key')
|
||||||
|
if thumbnail_key:
|
||||||
|
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||||
|
if m:
|
||||||
|
thumbnail['width'] = int(m.group(1))
|
||||||
|
thumbnail['height'] = int(m.group(2))
|
||||||
|
thumbnails.append(thumbnail)
|
||||||
|
|
||||||
|
upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': xpath_text(doc, './/information/detail'),
|
||||||
|
'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'uploader': xpath_text(doc, './/details/originChannelTitle'),
|
||||||
|
'uploader_id': xpath_text(doc, './/details/originChannelId'),
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
|
||||||
|
return self.extract_from_xml_url(video_id, details_url)
|
@ -0,0 +1,160 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_podcast_url,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SimplecastBaseIE(InfoExtractor):
|
||||||
|
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||||
|
_API_BASE = 'https://api.simplecast.com/'
|
||||||
|
|
||||||
|
def _call_api(self, path_tmpl, video_id):
|
||||||
|
return self._download_json(
|
||||||
|
self._API_BASE + path_tmpl % video_id, video_id)
|
||||||
|
|
||||||
|
def _call_search_api(self, resource, resource_id, resource_url):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.simplecast.com/%ss/search' % resource, resource_id,
|
||||||
|
data=urlencode_postdata({'url': resource_url}))
|
||||||
|
|
||||||
|
def _parse_episode(self, episode):
|
||||||
|
episode_id = episode['id']
|
||||||
|
title = episode['title'].strip()
|
||||||
|
audio_file = episode.get('audio_file') or {}
|
||||||
|
audio_file_url = audio_file.get('url') or episode.get('audio_file_url') or episode['enclosure_url']
|
||||||
|
|
||||||
|
season = episode.get('season') or {}
|
||||||
|
season_href = season.get('href')
|
||||||
|
season_id = None
|
||||||
|
if season_href:
|
||||||
|
season_id = self._search_regex(
|
||||||
|
r'https?://api.simplecast.com/seasons/(%s)' % self._UUID_REGEX,
|
||||||
|
season_href, 'season id', default=None)
|
||||||
|
|
||||||
|
webpage_url = episode.get('episode_url')
|
||||||
|
channel_url = None
|
||||||
|
if webpage_url:
|
||||||
|
channel_url = self._search_regex(
|
||||||
|
r'(https?://[^/]+\.simplecast\.com)',
|
||||||
|
webpage_url, 'channel url', default=None)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': episode_id,
|
||||||
|
'display_id': episode.get('slug'),
|
||||||
|
'title': title,
|
||||||
|
'url': clean_podcast_url(audio_file_url),
|
||||||
|
'webpage_url': webpage_url,
|
||||||
|
'channel_url': channel_url,
|
||||||
|
'series': try_get(episode, lambda x: x['podcast']['title']),
|
||||||
|
'season_number': int_or_none(season.get('number')),
|
||||||
|
'season_id': season_id,
|
||||||
|
'thumbnail': episode.get('image_url'),
|
||||||
|
'episode_id': episode_id,
|
||||||
|
'episode_number': int_or_none(episode.get('number')),
|
||||||
|
'description': strip_or_none(episode.get('description')),
|
||||||
|
'timestamp': parse_iso8601(episode.get('published_at')),
|
||||||
|
'duration': int_or_none(episode.get('duration')),
|
||||||
|
'filesize': int_or_none(audio_file.get('size') or episode.get('audio_file_size')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SimplecastIE(SimplecastBaseIE):
|
||||||
|
IE_NAME = 'simplecast'
|
||||||
|
_VALID_URL = r'https?://(?:api\.simplecast\.com/episodes|player\.simplecast\.com)/(?P<id>%s)' % SimplecastBaseIE._UUID_REGEX
|
||||||
|
_COMMON_TEST_INFO = {
|
||||||
|
'display_id': 'errant-signal-chris-franklin-new-wave-video-essays',
|
||||||
|
'id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Errant Signal - Chris Franklin & New Wave Video Essays',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode_id': 'b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||||
|
'description': 'md5:34752789d3d2702e2d2c975fbd14f357',
|
||||||
|
'season_number': 1,
|
||||||
|
'season_id': 'e23df0da-bae4-4531-8bbf-71364a88dc13',
|
||||||
|
'series': 'The RE:BIND.io Podcast',
|
||||||
|
'duration': 5343,
|
||||||
|
'timestamp': 1580979475,
|
||||||
|
'upload_date': '20200206',
|
||||||
|
'webpage_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
|
||||||
|
'channel_url': r're:^https?://the-re-bind-io-podcast\.simplecast\.com$',
|
||||||
|
}
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://api.simplecast.com/episodes/b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||||
|
'md5': '8c93be7be54251bf29ee97464eabe61c',
|
||||||
|
'info_dict': _COMMON_TEST_INFO,
|
||||||
|
}, {
|
||||||
|
'url': 'https://player.simplecast.com/b6dc49a2-9404-4853-9aa9-9cfc097be876',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return re.findall(
|
||||||
|
r'''(?x)<iframe[^>]+src=["\']
|
||||||
|
(
|
||||||
|
https?://(?:embed\.simplecast\.com/[0-9a-f]{8}|
|
||||||
|
player\.simplecast\.com/%s
|
||||||
|
))''' % SimplecastBaseIE._UUID_REGEX, webpage)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
episode_id = self._match_id(url)
|
||||||
|
episode = self._call_api('episodes/%s', episode_id)
|
||||||
|
return self._parse_episode(episode)
|
||||||
|
|
||||||
|
|
||||||
|
class SimplecastEpisodeIE(SimplecastBaseIE):
|
||||||
|
IE_NAME = 'simplecast:episode'
|
||||||
|
_VALID_URL = r'https?://(?!api\.)[^/]+\.simplecast\.com/episodes/(?P<id>[^/?&#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes/errant-signal-chris-franklin-new-wave-video-essays',
|
||||||
|
'md5': '8c93be7be54251bf29ee97464eabe61c',
|
||||||
|
'info_dict': SimplecastIE._COMMON_TEST_INFO,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
episode = self._call_search_api(
|
||||||
|
'episode', mobj.group(1), mobj.group(0))
|
||||||
|
return self._parse_episode(episode)
|
||||||
|
|
||||||
|
|
||||||
|
class SimplecastPodcastIE(SimplecastBaseIE):
|
||||||
|
IE_NAME = 'simplecast:podcast'
|
||||||
|
_VALID_URL = r'https?://(?!(?:api|cdn|embed|feeds|player)\.)(?P<id>[^/]+)\.simplecast\.com(?!/episodes/[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://the-re-bind-io-podcast.simplecast.com',
|
||||||
|
'playlist_mincount': 33,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '07d28d26-7522-42eb-8c53-2bdcfc81c43c',
|
||||||
|
'title': 'The RE:BIND.io Podcast',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://the-re-bind-io-podcast.simplecast.com/episodes',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
subdomain = self._match_id(url)
|
||||||
|
site = self._call_search_api('site', subdomain, url)
|
||||||
|
podcast = site['podcast']
|
||||||
|
podcast_id = podcast['id']
|
||||||
|
podcast_title = podcast.get('title')
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
episodes = self._call_api('podcasts/%s/episodes', podcast_id)
|
||||||
|
for episode in (episodes.get('collection') or []):
|
||||||
|
info = self._parse_episode(episode)
|
||||||
|
info['series'] = podcast_title
|
||||||
|
yield info
|
||||||
|
|
||||||
|
return self.playlist_result(entries(), podcast_id, podcast_title)
|
@ -1,255 +1,151 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
import functools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
# HEADRequest,
|
||||||
|
int_or_none,
|
||||||
|
OnDemandPagedList,
|
||||||
|
smuggle_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StoryFireBaseIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:www\.)?storyfire\.com/'
|
||||||
|
|
||||||
|
def _call_api(self, path, video_id, resource, query=None):
|
||||||
|
return self._download_json(
|
||||||
|
'https://storyfire.com/app/%s/%s' % (path, video_id), video_id,
|
||||||
|
'Downloading %s JSON metadata' % resource, query=query)
|
||||||
|
|
||||||
|
def _parse_video(self, video):
|
||||||
|
title = video['title']
|
||||||
|
vimeo_id = self._search_regex(
|
||||||
|
r'https?://player\.vimeo\.com/external/(\d+)',
|
||||||
|
video['vimeoVideoURL'], 'vimeo id')
|
||||||
|
|
||||||
|
# video_url = self._request_webpage(
|
||||||
|
# HEADRequest(video['vimeoVideoURL']), video_id).geturl()
|
||||||
|
# formats = []
|
||||||
|
# for v_url, suffix in [(video_url, '_sep'), (video_url.replace('/sep/video/', '/video/'), '')]:
|
||||||
|
# formats.extend(self._extract_m3u8_formats(
|
||||||
|
# v_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
# m3u8_id='hls' + suffix, fatal=False))
|
||||||
|
# formats.extend(self._extract_mpd_formats(
|
||||||
|
# v_url.replace('.m3u8', '.mpd'), video_id,
|
||||||
|
# mpd_id='dash' + suffix, fatal=False))
|
||||||
|
# self._sort_formats(formats)
|
||||||
|
|
||||||
|
uploader_id = video.get('hostID')
|
||||||
|
|
||||||
class StoryFireIE(InfoExtractor):
|
return {
|
||||||
_VALID_URL = r'(?:(?:https?://(?:www\.)?storyfire\.com/video-details)|(?:https://storyfire.app.link))/(?P<id>[^/\s]+)'
|
'_type': 'url_transparent',
|
||||||
_TESTS = [{
|
'id': vimeo_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'url': smuggle_url(
|
||||||
|
'https://player.vimeo.com/video/' + vimeo_id, {
|
||||||
|
'http_headers': {
|
||||||
|
'Referer': 'https://storyfire.com/',
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
# 'formats': formats,
|
||||||
|
'thumbnail': video.get('storyImage'),
|
||||||
|
'view_count': int_or_none(video.get('views')),
|
||||||
|
'like_count': int_or_none(video.get('likesCount')),
|
||||||
|
'comment_count': int_or_none(video.get('commentsCount')),
|
||||||
|
'duration': int_or_none(video.get('videoDuration')),
|
||||||
|
'timestamp': int_or_none(video.get('publishDate')),
|
||||||
|
'uploader': video.get('username'),
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'uploader_url': 'https://storyfire.com/user/%s/video' % uploader_id if uploader_id else None,
|
||||||
|
'episode_number': int_or_none(video.get('episodeNumber') or video.get('episode_number')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class StoryFireIE(StoryFireBaseIE):
|
||||||
|
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'video-details/(?P<id>[0-9a-f]{24})'
|
||||||
|
_TEST = {
|
||||||
'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181',
|
'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181',
|
||||||
'md5': '560953bfca81a69003cfa5e53ac8a920',
|
'md5': 'caec54b9e4621186d6079c7ec100c1eb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5df1d132b6378700117f9181',
|
'id': '378954662',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Buzzfeed Teaches You About Memes',
|
'title': 'Buzzfeed Teaches You About Memes',
|
||||||
'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
|
'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
|
||||||
'timestamp': 1576129028,
|
'timestamp': 1576129028,
|
||||||
'description': 'Mocking Buzzfeed\'s meme lesson. Reuploaded from YouTube because of their new policies',
|
'description': 'md5:0b4e28021548e144bed69bb7539e62ea',
|
||||||
'uploader': 'whang!',
|
'uploader': 'whang!',
|
||||||
'upload_date': '20191212',
|
'upload_date': '20191212',
|
||||||
|
'duration': 418,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
'params': {'format': 'bestvideo'} # There are no merged formats in the playlist.
|
'params': {
|
||||||
}, {
|
'skip_download': True,
|
||||||
'url': 'https://storyfire.app.link/5GxAvWOQr8', # Alternate URL format, with unrelated short ID
|
|
||||||
'md5': '7a2dc6d60c4889edfed459c620fe690d',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '5f1e11ecd78a57b6c702001d',
|
|
||||||
'ext': 'm4a',
|
|
||||||
'title': 'Weird Nintendo Prototype Leaks',
|
|
||||||
'description': 'A stream taking a look at some weird Nintendo Prototypes with Luigi in Mario 64 and weird Yoshis',
|
|
||||||
'timestamp': 1595808576,
|
|
||||||
'upload_date': '20200727',
|
|
||||||
'uploader': 'whang!',
|
|
||||||
'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
|
|
||||||
},
|
},
|
||||||
'params': {'format': 'bestaudio'} # Verifying audio extraction
|
'expected_warnings': ['Unable to download JSON metadata']
|
||||||
|
|
||||||
}]
|
|
||||||
|
|
||||||
_aformats = {
|
|
||||||
'audio-medium-audio': {'acodec': 'aac', 'abr': 125, 'preference': -10},
|
|
||||||
'audio-high-audio': {'acodec': 'aac', 'abr': 254, 'preference': -1},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
video = self._call_api(
|
||||||
|
'generic/video-detail', video_id, 'video')['video']
|
||||||
# Extracting the json blob is mandatory to proceed with extraction.
|
return self._parse_video(video)
|
||||||
jsontext = self._html_search_regex(
|
|
||||||
r'<script id="__NEXT_DATA__" type="application/json">(.+?)</script>',
|
|
||||||
webpage, 'json_data')
|
|
||||||
|
|
||||||
json = self._parse_json(jsontext, video_id)
|
|
||||||
|
|
||||||
# The currentVideo field in the json is mandatory
|
|
||||||
# because it contains the only link to the m3u playlist
|
|
||||||
video = json['props']['initialState']['video']['currentVideo']
|
|
||||||
videourl = video['vimeoVideoURL'] # Video URL is mandatory
|
|
||||||
|
|
||||||
# Extract other fields from the json in an error tolerant fashion
|
|
||||||
# ID may be incorrect (on short URL format), correct it.
|
|
||||||
parsed_id = video.get('_id')
|
|
||||||
if parsed_id:
|
|
||||||
video_id = parsed_id
|
|
||||||
|
|
||||||
title = video.get('title')
|
|
||||||
description = video.get('description')
|
|
||||||
|
|
||||||
thumbnail = video.get('storyImage')
|
|
||||||
views = video.get('views')
|
|
||||||
likes = video.get('likesCount')
|
|
||||||
comments = video.get('commentsCount')
|
|
||||||
duration = video.get('videoDuration')
|
|
||||||
publishdate = video.get('publishDate') # Apparently epoch time, day only
|
|
||||||
|
|
||||||
uploader = video.get('username')
|
|
||||||
uploader_id = video.get('hostID')
|
|
||||||
# Construct an uploader URL
|
|
||||||
uploader_url = None
|
|
||||||
if uploader_id:
|
|
||||||
uploader_url = "https://storyfire.com/user/%s/video" % uploader_id
|
|
||||||
|
|
||||||
# Collect root playlist to determine formats
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
videourl, video_id, 'mp4', 'm3u8_native')
|
|
||||||
|
|
||||||
# Modify formats to fill in missing information about audio codecs
|
|
||||||
for format in formats:
|
|
||||||
aformat = self._aformats.get(format['format_id'])
|
|
||||||
if aformat:
|
|
||||||
format['acodec'] = aformat['acodec']
|
|
||||||
format['abr'] = aformat['abr']
|
|
||||||
format['quality'] = aformat['preference']
|
|
||||||
format['ext'] = 'm4a'
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'ext': "mp4",
|
|
||||||
'url': videourl,
|
|
||||||
'formats': formats,
|
|
||||||
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'view_count': views,
|
|
||||||
'like_count': likes,
|
|
||||||
'comment_count': comments,
|
|
||||||
'duration': duration,
|
|
||||||
'timestamp': publishdate,
|
|
||||||
|
|
||||||
'uploader': uploader,
|
|
||||||
'uploader_id': uploader_id,
|
|
||||||
'uploader_url': uploader_url,
|
|
||||||
|
|
||||||
}
|
class StoryFireUserIE(StoryFireBaseIE):
|
||||||
|
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'user/(?P<id>[^/]+)/video'
|
||||||
|
_TEST = {
|
||||||
class StoryFireUserIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?storyfire\.com/user/(?P<id>[^/\s]+)/video'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://storyfire.com/user/ntZAJFECERSgqHSxzonV5K2E89s1/video',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
|
|
||||||
'title': 'whang!',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 18
|
|
||||||
}, {
|
|
||||||
'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video',
|
'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2',
|
'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2',
|
||||||
'title': 'McJuggerNuggets',
|
|
||||||
},
|
},
|
||||||
'playlist_mincount': 143
|
'playlist_mincount': 151,
|
||||||
|
|
||||||
}]
|
|
||||||
|
|
||||||
# Generator for fetching playlist items
|
|
||||||
def _enum_videos(self, baseurl, user_id, firstjson):
|
|
||||||
totalVideos = int(firstjson['videosCount'])
|
|
||||||
haveVideos = 0
|
|
||||||
json = firstjson
|
|
||||||
|
|
||||||
for page in itertools.count(1):
|
|
||||||
for video in json['videos']:
|
|
||||||
id = video['_id']
|
|
||||||
url = "https://storyfire.com/video-details/%s" % id
|
|
||||||
haveVideos += 1
|
|
||||||
yield {
|
|
||||||
'_type': 'url',
|
|
||||||
'id': id,
|
|
||||||
'url': url,
|
|
||||||
'ie_key': 'StoryFire',
|
|
||||||
|
|
||||||
'title': video.get('title'),
|
|
||||||
'description': video.get('description'),
|
|
||||||
'view_count': video.get('views'),
|
|
||||||
'comment_count': video.get('commentsCount'),
|
|
||||||
'duration': video.get('videoDuration'),
|
|
||||||
'timestamp': video.get('publishDate'),
|
|
||||||
}
|
}
|
||||||
# Are there more pages we could fetch?
|
_PAGE_SIZE = 20
|
||||||
if haveVideos < totalVideos:
|
|
||||||
pageurl = baseurl + ("%i" % haveVideos)
|
|
||||||
json = self._download_json(pageurl, user_id,
|
|
||||||
note='Downloading page %s' % page)
|
|
||||||
|
|
||||||
# Are there any videos in the new json?
|
def _fetch_page(self, user_id, page):
|
||||||
videos = json.get('videos')
|
videos = self._call_api(
|
||||||
if not videos or len(videos) == 0:
|
'publicVideos', user_id, 'page %d' % (page + 1), {
|
||||||
break # no videos
|
'skip': page * self._PAGE_SIZE,
|
||||||
|
})['videos']
|
||||||
else:
|
for video in videos:
|
||||||
break # We have fetched all the videos, stop
|
yield self._parse_video(video)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
user_id = self._match_id(url)
|
user_id = self._match_id(url)
|
||||||
|
entries = OnDemandPagedList(functools.partial(
|
||||||
|
self._fetch_page, user_id), self._PAGE_SIZE)
|
||||||
|
return self.playlist_result(entries, user_id)
|
||||||
|
|
||||||
baseurl = "https://storyfire.com/app/publicVideos/%s?skip=" % user_id
|
|
||||||
|
|
||||||
# Download first page to ensure it can be downloaded, and get user information if available.
|
|
||||||
firstpage = baseurl + "0"
|
|
||||||
firstjson = self._download_json(firstpage, user_id)
|
|
||||||
|
|
||||||
title = None
|
|
||||||
videos = firstjson.get('videos')
|
|
||||||
if videos and len(videos):
|
|
||||||
title = videos[1].get('username')
|
|
||||||
|
|
||||||
return {
|
class StoryFireSeriesIE(StoryFireBaseIE):
|
||||||
'_type': 'playlist',
|
_VALID_URL = StoryFireBaseIE._VALID_URL_BASE + r'write/series/stories/(?P<id>[^/?&#]+)'
|
||||||
'entries': self._enum_videos(baseurl, user_id, firstjson),
|
|
||||||
'id': user_id,
|
|
||||||
'title': title,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class StoryFireSeriesIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?storyfire\.com/write/series/stories/(?P<id>[^/\s]+)'
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/',
|
'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '-Lq6MsuIHLODO6d2dDkr',
|
'id': '-Lq6MsuIHLODO6d2dDkr',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 13
|
'playlist_mincount': 13,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://storyfire.com/write/series/stories/the_mortal_one/',
|
'url': 'https://storyfire.com/write/series/stories/the_mortal_one/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'the_mortal_one',
|
'id': 'the_mortal_one',
|
||||||
},
|
},
|
||||||
'playlist_count': 0 # This playlist has entries, but no videos.
|
'playlist_count': 0,
|
||||||
}, {
|
|
||||||
'url': 'https://storyfire.com/write/series/stories/story_time',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'story_time',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 10
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
# Generator for returning playlist items
|
def _extract_videos(self, stories):
|
||||||
# This object is substantially different than the one in the user videos page above
|
for story in stories.values():
|
||||||
def _enum_videos(self, jsonlist):
|
if story.get('hasVideo'):
|
||||||
for video in jsonlist:
|
yield self._parse_video(story)
|
||||||
id = video['_id']
|
|
||||||
if video.get('hasVideo'): # Boolean element
|
|
||||||
url = "https://storyfire.com/video-details/%s" % id
|
|
||||||
yield {
|
|
||||||
'_type': 'url',
|
|
||||||
'id': id,
|
|
||||||
'url': url,
|
|
||||||
'ie_key': 'StoryFire',
|
|
||||||
|
|
||||||
'title': video.get('title'),
|
|
||||||
'description': video.get('description'),
|
|
||||||
'view_count': video.get('views'),
|
|
||||||
'likes_count': video.get('likesCount'),
|
|
||||||
'comment_count': video.get('commentsCount'),
|
|
||||||
'duration': video.get('videoDuration'),
|
|
||||||
'timestamp': video.get('publishDate'),
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
list_id = self._match_id(url)
|
series_id = self._match_id(url)
|
||||||
|
stories = self._call_api(
|
||||||
listurl = "https://storyfire.com/app/seriesStories/%s/list" % list_id
|
'seriesStories', series_id, 'series stories')
|
||||||
json = self._download_json(listurl, list_id)
|
return self.playlist_result(self._extract_videos(stories), series_id)
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'entries': self._enum_videos(json),
|
|
||||||
'id': list_id
|
|
||||||
}
|
|
||||||
|
@ -0,0 +1,69 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import float_or_none, int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class ZhihuIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?zhihu\.com/zvideo/(?P<id>[0-9]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.zhihu.com/zvideo/1342930761977176064',
|
||||||
|
'md5': 'c8d4c9cd72dd58e6f9bc9c2c84266464',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1342930761977176064',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '写春联也太难了吧!',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'uploader': '桥半舫',
|
||||||
|
'timestamp': 1612959715,
|
||||||
|
'upload_date': '20210210',
|
||||||
|
'uploader_id': '244ecb13b0fd7daf92235288c8ca3365',
|
||||||
|
'duration': 146.333,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
zvideo = self._download_json(
|
||||||
|
'https://www.zhihu.com/api/v4/zvideos/' + video_id, video_id)
|
||||||
|
title = zvideo['title']
|
||||||
|
video = zvideo.get('video') or {}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_id, q in (video.get('playlist') or {}).items():
|
||||||
|
play_url = q.get('url') or q.get('play_url')
|
||||||
|
if not play_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'asr': int_or_none(q.get('sample_rate')),
|
||||||
|
'filesize': int_or_none(q.get('size')),
|
||||||
|
'format_id': format_id,
|
||||||
|
'fps': int_or_none(q.get('fps')),
|
||||||
|
'height': int_or_none(q.get('height')),
|
||||||
|
'tbr': float_or_none(q.get('bitrate')),
|
||||||
|
'url': play_url,
|
||||||
|
'width': int_or_none(q.get('width')),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
author = zvideo.get('author') or {}
|
||||||
|
url_token = author.get('url_token')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': video.get('thumbnail') or zvideo.get('image_url'),
|
||||||
|
'uploader': author.get('name'),
|
||||||
|
'timestamp': int_or_none(zvideo.get('published_at')),
|
||||||
|
'uploader_id': author.get('id'),
|
||||||
|
'uploader_url': 'https://www.zhihu.com/people/' + url_token if url_token else None,
|
||||||
|
'duration': float_or_none(video.get('duration')),
|
||||||
|
'view_count': int_or_none(zvideo.get('play_count')),
|
||||||
|
'like_count': int_or_none(zvideo.get('liked_count')),
|
||||||
|
'comment_count': int_or_none(zvideo.get('comment_count')),
|
||||||
|
}
|
Loading…
Reference in New Issue