[ie/niconico] Fix error handling & improve metadata extraction (#13240)

Closes #13338
Authored by: doe1080
pull/13370/merge
doe1080 2 weeks ago committed by GitHub
parent 1c6068af99
commit 05e553e9d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -3,7 +3,6 @@ import functools
import itertools import itertools
import json import json
import re import re
import time
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
@ -16,12 +15,12 @@ from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_bitrate, parse_bitrate,
parse_duration,
parse_iso8601, parse_iso8601,
parse_qs, parse_qs,
parse_resolution, parse_resolution,
qualities, qualities,
str_or_none, str_or_none,
time_seconds,
truncate_string, truncate_string,
unified_timestamp, unified_timestamp,
update_url_query, update_url_query,
@ -38,8 +37,14 @@ from ..utils.traversal import (
class NiconicoBaseIE(InfoExtractor): class NiconicoBaseIE(InfoExtractor):
_API_BASE = 'https://nvapi.nicovideo.jp'
_BASE_URL = 'https://www.nicovideo.jp'
_GEO_BYPASS = False _GEO_BYPASS = False
_GEO_COUNTRIES = ['JP'] _GEO_COUNTRIES = ['JP']
_HEADERS = {
'X-Frontend-ID': '6',
'X-Frontend-Version': '0',
}
_LOGIN_BASE = 'https://account.nicovideo.jp' _LOGIN_BASE = 'https://account.nicovideo.jp'
_NETRC_MACHINE = 'niconico' _NETRC_MACHINE = 'niconico'
@ -99,146 +104,266 @@ class NiconicoIE(NiconicoBaseIE):
IE_NAME = 'niconico' IE_NAME = 'niconico'
IE_DESC = 'ニコニコ動画' IE_DESC = 'ニコニコ動画'
_VALID_URL = r'https?://(?:(?:embed|sp|www)\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?\d+)'
_ERROR_MAP = {
'FORBIDDEN': {
'ADMINISTRATOR_DELETE_VIDEO': 'Video unavailable, possibly removed by admins',
'CHANNEL_MEMBER_ONLY': 'Channel members only',
'DELETED_CHANNEL_VIDEO': 'Video unavailable, channel was closed',
'DELETED_COMMUNITY_VIDEO': 'Video unavailable, community deleted or missing',
'DEFAULT': 'Page unavailable, check the URL',
'HARMFUL_VIDEO': 'Sensitive content, login required',
'HIDDEN_VIDEO': 'Video unavailable, set to private',
'NOT_ALLOWED': 'No permission',
'PPV_VIDEO': 'PPV video, payment information required',
'PREMIUM_ONLY': 'Premium members only',
},
'INVALID_PARAMETER': {
'DEFAULT': 'Video unavailable, may not exist or was deleted',
},
'MAINTENANCE': {
'DEFAULT': 'Maintenance is in progress',
},
'NOT_FOUND': {
'DEFAULT': 'Video unavailable, may not exist or was deleted',
'RIGHT_HOLDER_DELETE_VIDEO': 'Removed by rights-holder request',
},
'UNAUTHORIZED': {
'DEFAULT': 'Invalid session, re-login required',
},
'UNKNOWN': {
'DEFAULT': 'Failed to fetch content',
},
}
_STATUS_MAP = {
'needs_auth': 'PPV video, payment information required',
'premium_only': 'Premium members only',
'subscriber_only': 'Channel members only',
}
_TESTS = [{ _TESTS = [{
'url': 'http://www.nicovideo.jp/watch/sm22312215', 'url': 'https://www.nicovideo.jp/watch/1173108780',
'info_dict': { 'info_dict': {
'id': 'sm22312215', 'id': 'sm9',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Big Buck Bunny', 'title': '新・豪血寺一族 -煩悩解放 - レッツゴー!陰陽師',
'thumbnail': r're:https?://.*', 'availability': 'public',
'uploader': 'takuya0301', 'channel': '中の',
'uploader_id': '2698420', 'channel_id': '4',
'upload_date': '20131123',
'timestamp': int, # timestamp is unstable
'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
'duration': 33,
'view_count': int,
'comment_count': int, 'comment_count': int,
'description': 'md5:b7f6d3e6c29552cc19fdea6a4b7dc194',
'display_id': '1173108780',
'duration': 320,
'genres': ['未設定'], 'genres': ['未設定'],
'tags': [], 'like_count': int,
'tags': 'mincount:5',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1173108780,
'upload_date': '20070305',
'uploader': '中の',
'uploader_id': '4',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.nicovideo.jp/watch/sm8628149',
'info_dict': {
'id': 'sm8628149',
'ext': 'mp4',
'title': '【東方】Bad Apple!!\u3000PV【影絵】',
'availability': 'public',
'channel': 'あにら',
'channel_id': '10731211',
'comment_count': int,
'description': 'md5:1999669158cb77a45bab123c4fafe1d7',
'display_id': 'sm8628149',
'duration': 219,
'genres': ['ゲーム'],
'like_count': int,
'tags': 'mincount:3',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1256580802,
'upload_date': '20091026',
'uploader': 'あにら',
'uploader_id': '10731211',
'view_count': int,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
# File downloaded with and without credentials are different, so omit 'url': 'https://www.nicovideo.jp/watch/nm14296458',
# the md5 field
'url': 'http://www.nicovideo.jp/watch/nm14296458',
'info_dict': { 'info_dict': {
'id': 'nm14296458', 'id': 'nm14296458',
'ext': 'mp4', 'ext': 'mp4',
'title': '【Kagamine Rin】Dance on media【Original】take2!', 'title': '【鏡音リン】Dance on media【オリジナル】take2!',
'availability': 'public',
'channel': 'りょうた',
'channel_id': '18822557',
'comment_count': int,
'description': 'md5:9368f2b1f4178de64f2602c2f3d6cbf5', 'description': 'md5:9368f2b1f4178de64f2602c2f3d6cbf5',
'thumbnail': r're:https?://.*', 'display_id': 'nm14296458',
'duration': 208,
'genres': ['音楽・サウンド'],
'like_count': int,
'tags': 'mincount:1',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1304065916,
'upload_date': '20110429',
'uploader': 'りょうた', 'uploader': 'りょうた',
'uploader_id': '18822557', 'uploader_id': '18822557',
'upload_date': '20110429',
'timestamp': 1304065916,
'duration': 208.0,
'comment_count': int,
'view_count': int, 'view_count': int,
'genres': ['音楽・サウンド'],
'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
# 'video exists but is marked as "deleted" 'url': 'https://www.nicovideo.jp/watch/nl1872567',
# md5 is unstable
'url': 'http://www.nicovideo.jp/watch/sm10000',
'info_dict': { 'info_dict': {
'id': 'sm10000', 'id': 'nl1872567',
'ext': 'unknown_video', 'ext': 'mp4',
'description': 'deleted', 'title': '【12/25放送分】『生対談!!ひろゆきと戀塚のニコニコを作った人 』前半',
'title': 'ドラえもんエターナル第3話「決戦第3新東京市」前編', 'availability': 'public',
'thumbnail': r're:https?://.*', 'channel': 'nicolive',
'upload_date': '20071224', 'channel_id': '394',
'timestamp': int, # timestamp field has different value if logged in 'comment_count': int,
'duration': 304, 'description': 'md5:79fc3a54cfdc93ecc2b883285149e548',
'display_id': 'nl1872567',
'duration': 586,
'genres': ['エンターテイメント'],
'like_count': int,
'tags': 'mincount:3',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1198637246,
'upload_date': '20071226',
'uploader': 'nicolive',
'uploader_id': '394',
'view_count': int, 'view_count': int,
}, },
'skip': 'Requires an account', 'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'http://www.nicovideo.jp/watch/so22543406', 'url': 'https://www.nicovideo.jp/watch/so38016254',
'info_dict': { 'info_dict': {
'id': '1388129933', 'id': 'so38016254',
'ext': 'mp4', 'ext': 'mp4',
'title': '【第1回】RADIOアニメロミックス ラブライブのぞえりRadio Garden', 'title': '「のんのんびより のんすとっぷ」 PV',
'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1', 'availability': 'public',
'thumbnail': r're:https?://.*', 'channel': 'のんのんびより のんすとっぷ',
'timestamp': 1388851200, 'channel_id': 'ch2647028',
'upload_date': '20140104', 'comment_count': int,
'uploader': 'アニメロチャンネル', 'description': 'md5:6e2ff55b33e3645d59ef010869cde6a2',
'uploader_id': '312', 'display_id': 'so38016254',
'duration': 114,
'genres': ['アニメ'],
'like_count': int,
'tags': 'mincount:4',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1609146000,
'upload_date': '20201228',
'uploader': 'のんのんびより のんすとっぷ',
'uploader_id': 'ch2647028',
'view_count': int,
}, },
'skip': 'The viewing period of the video you were searching for has expired.', 'params': {'skip_download': 'm3u8'},
}, { }, {
# video not available via `getflv`; "old" HTML5 video # smile official, but marked as user video
'url': 'http://www.nicovideo.jp/watch/sm1151009', 'url': 'https://www.nicovideo.jp/watch/so37602536',
'info_dict': { 'info_dict': {
'id': 'sm1151009', 'id': 'so37602536',
'ext': 'mp4', 'ext': 'mp4',
'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)', 'title': '田中有紀とゆきだるまと! 限定放送アーカイブ第12回',
'description': 'md5:f95a3d259172667b293530cc2e41ebda', 'availability': 'subscriber_only',
'thumbnail': r're:https?://.*', 'channel': 'あみあみ16',
'duration': 184, 'channel_id': '91072761',
'timestamp': 1190835883,
'upload_date': '20070926',
'uploader': 'denden2',
'uploader_id': '1392194',
'view_count': int,
'comment_count': int, 'comment_count': int,
'genres': ['ゲーム'], 'description': 'md5:2ee357ec4e76d7804fb59af77107ab67',
'tags': [], 'display_id': 'so37602536',
'duration': 980,
'genres': ['エンターテイメント'],
'like_count': int,
'tags': 'count:4',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1601377200,
'upload_date': '20200929',
'uploader': 'あみあみ16',
'uploader_id': '91072761',
'view_count': int,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'Channel members only',
}, { }, {
# "New" HTML5 video 'url': 'https://www.nicovideo.jp/watch/so41370536',
'url': 'http://www.nicovideo.jp/watch/sm31464864',
'info_dict': { 'info_dict': {
'id': 'sm31464864', 'id': 'so41370536',
'ext': 'mp4', 'ext': 'mp4',
'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質', 'title': 'ZUN【出演者別】超パーティー2022',
'description': 'md5:e52974af9a96e739196b2c1ca72b5feb', 'availability': 'premium_only',
'timestamp': 1498481660, 'channel': 'ニコニコ超会議チャンネル',
'upload_date': '20170626', 'channel_id': 'ch2607134',
'uploader': 'no-namamae',
'uploader_id': '40826363',
'thumbnail': r're:https?://.*',
'duration': 198,
'view_count': int,
'comment_count': int, 'comment_count': int,
'genres': ['アニメ'], 'description': 'md5:5692db5ac40d3a374fc5ec182d0249c3',
'tags': [], 'display_id': 'so41370536',
'duration': 63,
'genres': ['音楽・サウンド'],
'like_count': int,
'tags': 'mincount:5',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1668394800,
'upload_date': '20221114',
'uploader': 'ニコニコ超会議チャンネル',
'uploader_id': 'ch2607134',
'view_count': int,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'Premium members only',
}, { }, {
# Video without owner 'url': 'https://www.nicovideo.jp/watch/so37574174',
'url': 'http://www.nicovideo.jp/watch/sm18238488',
'info_dict': { 'info_dict': {
'id': 'sm18238488', 'id': 'so37574174',
'ext': 'mp4', 'ext': 'mp4',
'title': '【実写版】ミュータントタートルズ', 'title': 'ひぐらしのなく頃に 廿回し編\u3000第1回',
'description': 'md5:15df8988e47a86f9e978af2064bf6d8e', 'availability': 'subscriber_only',
'timestamp': 1341128008, 'channel': '「ひぐらしのなく頃に」オフィシャルチャンネル',
'upload_date': '20120701', 'channel_id': 'ch2646036',
'thumbnail': r're:https?://.*',
'duration': 5271,
'view_count': int,
'comment_count': int, 'comment_count': int,
'genres': ['エンターテイメント'], 'description': 'md5:5296196d51d9c0b7272b73f9a99c236a',
'tags': [], 'display_id': 'so37574174',
'duration': 1931,
'genres': ['ラジオ'],
'like_count': int,
'tags': 'mincount:5',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1601028000,
'upload_date': '20200925',
'uploader': '「ひぐらしのなく頃に」オフィシャルチャンネル',
'uploader_id': 'ch2646036',
'view_count': int,
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'skip': 'Channel members only',
}, { }, {
'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'url': 'https://www.nicovideo.jp/watch/so44060088',
'only_matching': True, 'info_dict': {
}, { 'id': 'so44060088',
'note': 'a video that is only served as an ENCRYPTED HLS.', 'ext': 'mp4',
'url': 'https://www.nicovideo.jp/watch/so38016254', 'title': '松田的超英雄電波。《仮面ライダーガッチャード 放送終了記念特別番組》',
'only_matching': True, 'availability': 'subscriber_only',
'channel': 'あみあみチャンネル',
'channel_id': 'ch2638921',
'comment_count': int,
'description': 'md5:9dec5bb9a172b6d20a255ecb64fbd03e',
'display_id': 'so44060088',
'duration': 1881,
'genres': ['ラジオ'],
'like_count': int,
'tags': 'mincount:7',
'thumbnail': r're:https?://img\.cdn\.nimg\.jp/s/nicovideo/thumbnails/.+',
'timestamp': 1725361200,
'upload_date': '20240903',
'uploader': 'あみあみチャンネル',
'uploader_id': 'ch2638921',
'view_count': int,
},
'params': {'skip_download': 'm3u8'},
'skip': 'Channel members only; specified continuous membership period required',
}] }]
_VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)' def _extract_formats(self, api_data, video_id):
def _yield_dms_formats(self, api_data, video_id):
fmt_filter = lambda _, v: v['isAvailable'] and v['id'] fmt_filter = lambda _, v: v['isAvailable'] and v['id']
videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter)) videos = traverse_obj(api_data, ('media', 'domand', 'videos', fmt_filter))
audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter)) audios = traverse_obj(api_data, ('media', 'domand', 'audios', fmt_filter))
@ -247,164 +372,135 @@ class NiconicoIE(NiconicoBaseIE):
if not all((videos, audios, access_key, track_id)): if not all((videos, audios, access_key, track_id)):
return return
dms_m3u8_url = self._download_json( m3u8_url = self._download_json(
f'https://nvapi.nicovideo.jp/v1/watch/{video_id}/access-rights/hls', video_id, f'{self._API_BASE}/v1/watch/{video_id}/access-rights/hls',
data=json.dumps({ video_id, headers={
'Accept': 'application/json;charset=utf-8',
'Content-Type': 'application/json',
'X-Access-Right-Key': access_key,
'X-Request-With': self._BASE_URL,
**self._HEADERS,
}, query={
'actionTrackId': track_id,
}, data=json.dumps({
'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios))), 'outputs': list(itertools.product((v['id'] for v in videos), (a['id'] for a in audios))),
}).encode(), query={'actionTrackId': track_id}, headers={ }).encode(),
'x-access-right-key': access_key, )['data']['contentUrl']
'x-frontend-id': 6, raw_fmts = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
'x-frontend-version': 0,
'x-request-with': 'https://www.nicovideo.jp', formats = []
})['data']['contentUrl'] for a_fmt in traverse_obj(raw_fmts, lambda _, v: v['vcodec'] == 'none'):
# Getting all audio formats results in duplicate video formats which we filter out later formats.append({
dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id, 'mp4') **a_fmt,
**traverse_obj(audios, (lambda _, v: a_fmt['format_id'].startswith(v['id']), {
# m3u8 extraction does not provide audio bitrates, so extract from the API data and fix
for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'):
yield {
**audio_fmt,
**traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), {
'format_id': ('id', {str}),
'abr': ('bitRate', {float_or_none(scale=1000)}), 'abr': ('bitRate', {float_or_none(scale=1000)}),
'asr': ('samplingRate', {int_or_none}), 'asr': ('samplingRate', {int_or_none}),
'format_id': ('id', {str}),
'quality': ('qualityLevel', {int_or_none}), 'quality': ('qualityLevel', {int_or_none}),
}), get_all=False), }, any)),
'acodec': 'aac', 'acodec': 'aac',
} })
# Sort before removing dupes to keep the format dicts with the lowest tbr # Sort first, keeping the lowest-tbr formats
video_fmts = sorted((fmt for fmt in dms_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr']) v_fmts = sorted((fmt for fmt in raw_fmts if fmt['vcodec'] != 'none'), key=lambda f: f['tbr'])
self._remove_duplicate_formats(video_fmts) self._remove_duplicate_formats(v_fmts)
# Calculate the true vbr/tbr by subtracting the lowest abr # Calculate the true vbr/tbr by subtracting the lowest abr
min_abr = min(traverse_obj(audios, (..., 'bitRate', {float_or_none})), default=0) / 1000 min_abr = traverse_obj(audios, (..., 'bitRate', {float_or_none(scale=1000)}, all, {min})) or 0
for video_fmt in video_fmts: for v_fmt in v_fmts:
video_fmt['tbr'] -= min_abr v_fmt['format_id'] = url_basename(v_fmt['url']).rpartition('.')[0]
video_fmt['format_id'] = url_basename(video_fmt['url']).rpartition('.')[0] v_fmt['quality'] = traverse_obj(videos, (
video_fmt['quality'] = traverse_obj(videos, ( lambda _, v: v['id'] == v_fmt['format_id'], 'qualityLevel', {int_or_none}, any)) or -1
lambda _, v: v['id'] == video_fmt['format_id'], 'qualityLevel', {int_or_none}, any)) or -1 v_fmt['tbr'] -= min_abr
yield video_fmt formats.extend(v_fmts)
def _extract_server_response(self, webpage, video_id, fatal=True): return formats
try:
return traverse_obj(
self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id),
('data', 'response', {dict}, {require('server response')}))
except ExtractorError:
if not fatal:
return {}
raise
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
try: path = 'v3' if self.is_logged_in else 'v3_guest'
webpage, handle = self._download_webpage_handle( api_resp = self._download_json(
f'https://www.nicovideo.jp/watch/{video_id}', video_id, f'{self._BASE_URL}/api/watch/{path}/{video_id}', video_id,
headers=self.geo_verification_headers()) 'Downloading API JSON', 'Unable to fetch data', headers={
if video_id.startswith('so'): **self._HEADERS,
video_id = self._match_id(handle.url) **self.geo_verification_headers(),
}, query={
api_data = self._extract_server_response(webpage, video_id) 'actionTrackId': f'AAAAAAAAAA_{round(time_seconds() * 1000)}',
except ExtractorError as e: }, expected_status=[400, 404])
try:
api_data = self._download_json( api_data = api_resp['data']
f'https://www.nicovideo.jp/api/watch/v3/{video_id}', video_id, scheduled_time = traverse_obj(api_data, ('publishScheduledAt', {str}))
'Downloading API JSON', 'Unable to fetch data', query={ status = traverse_obj(api_resp, ('meta', 'status', {int}))
'_frontendId': '6',
'_frontendVersion': '0', if status != 200:
'actionTrackId': f'AAAAAAAAAA_{round(time.time() * 1000)}', err_code = traverse_obj(api_resp, ('meta', 'errorCode', {str.upper}))
}, headers=self.geo_verification_headers())['data'] reason_code = traverse_obj(api_data, ('reasonCode', {str_or_none}))
except ExtractorError: err_msg = traverse_obj(self._ERROR_MAP, (err_code, (reason_code, 'DEFAULT'), {str}, any))
if not isinstance(e.cause, HTTPError):
# Raise if original exception was from _parse_json or utils.traversal.require if reason_code in ('DOMESTIC_VIDEO', 'HIGH_RISK_COUNTRY_VIDEO'):
raise self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
# The webpage server response has more detailed error info than the API response elif reason_code == 'HARMFUL_VIDEO' and traverse_obj(api_data, (
webpage = e.cause.response.read().decode('utf-8', 'replace') 'viewer', 'allowSensitiveContents', {bool},
reason_code = self._extract_server_response( )) is False:
webpage, video_id, fatal=False).get('reasonCode') err_msg = 'Sensitive content, adjust display settings to watch'
if not reason_code: elif reason_code == 'HIDDEN_VIDEO' and scheduled_time:
raise err_msg = f'This content is scheduled to be released at {scheduled_time}'
if reason_code in ('DOMESTIC_VIDEO', 'HIGH_RISK_COUNTRY_VIDEO'): elif reason_code in ('CHANNEL_MEMBER_ONLY', 'HARMFUL_VIDEO', 'HIDDEN_VIDEO', 'PPV_VIDEO', 'PREMIUM_ONLY'):
self.raise_geo_restricted(countries=self._GEO_COUNTRIES) self.raise_login_required(err_msg)
elif reason_code == 'HIDDEN_VIDEO':
raise ExtractorError( if err_msg:
'The viewing period of this video has expired', expected=True) raise ExtractorError(err_msg, expected=True)
elif reason_code == 'DELETED_VIDEO': if status and status >= 500:
raise ExtractorError('This video has been deleted', expected=True) raise ExtractorError('Service temporarily unavailable', expected=True)
raise ExtractorError(f'Niconico says: {reason_code}') raise ExtractorError(f'API returned error status {status}')
availability = self._availability(**(traverse_obj(api_data, ('payment', 'video', { availability = self._availability(**traverse_obj(api_data, ('payment', 'video', {
'needs_premium': ('isPremium', {bool}), 'needs_auth': (('isContinuationBenefit', 'isPpv'), {bool}, any),
'needs_subscription': ('isAdmission', {bool}), 'needs_subscription': ('isAdmission', {bool}),
})) or {'needs_auth': True})) 'needs_premium': ('isPremium', {bool}),
}))) or 'public'
formats = list(self._yield_dms_formats(api_data, video_id))
if not formats:
fail_msg = clean_html(self._html_search_regex(
r'<p[^>]+\bclass="fail-message"[^>]*>(?P<msg>.+?)</p>',
webpage, 'fail message', default=None, group='msg'))
if fail_msg:
self.to_screen(f'Niconico said: {fail_msg}')
if fail_msg and 'された地域と同じ地域からのみ視聴できます。' in fail_msg:
availability = None
self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True)
elif availability == 'premium_only':
self.raise_login_required('This video requires premium', metadata_available=True)
elif availability == 'subscriber_only':
self.raise_login_required('This video is for members only', metadata_available=True)
elif availability == 'needs_auth':
self.raise_login_required(metadata_available=False)
# Start extracting information
tags = None
if webpage:
# use og:video:tag (not logged in)
og_video_tags = re.finditer(r'<meta\s+property="og:video:tag"\s*content="(.*?)">', webpage)
tags = list(filter(None, (clean_html(x.group(1)) for x in og_video_tags)))
if not tags:
# use keywords and split with comma (not logged in)
kwds = self._html_search_meta('keywords', webpage, default=None)
if kwds:
tags = [x for x in kwds.split(',') if x]
if not tags:
# find in json (logged in)
tags = traverse_obj(api_data, ('tag', 'items', ..., 'name'))
thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp']) formats = self._extract_formats(api_data, video_id)
err_msg = self._STATUS_MAP.get(availability)
if not formats and err_msg:
self.raise_login_required(err_msg, metadata_available=True)
def get_video_info(*items, get_first=True, **kwargs): thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])
return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
return { return {
'id': video_id,
'_api_data': api_data,
'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
'formats': formats,
'availability': availability, 'availability': availability,
'display_id': video_id,
'formats': formats,
'genres': traverse_obj(api_data, ('genre', 'label', {str}, filter, all, filter)),
'release_timestamp': parse_iso8601(scheduled_time),
'subtitles': self.extract_subtitles(video_id, api_data),
'tags': traverse_obj(api_data, ('tag', 'items', ..., 'name', {str}, filter, all, filter)),
'thumbnails': [{ 'thumbnails': [{
'id': key,
'url': url,
'ext': 'jpg', 'ext': 'jpg',
'id': key,
'preference': thumb_prefs(key), 'preference': thumb_prefs(key),
'url': url,
**parse_resolution(url, lenient=True), **parse_resolution(url, lenient=True),
} for key, url in (get_video_info('thumbnail') or {}).items() if url], } for key, url in traverse_obj(api_data, (
'description': clean_html(get_video_info('description')), 'video', 'thumbnail', {dict}), default={}).items()],
'uploader': traverse_obj(api_data, ('owner', 'nickname'), ('channel', 'name'), ('community', 'name')), **traverse_obj(api_data, (('channel', 'owner'), any, {
'uploader_id': str_or_none(traverse_obj(api_data, ('owner', 'id'), ('channel', 'id'), ('community', 'id'))), 'channel': (('name', 'nickname'), {str}, any),
'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601( 'channel_id': ('id', {str_or_none}),
self._html_search_meta('video:release_date', webpage, 'date published', default=None)), 'uploader': (('name', 'nickname'), {str}, any),
'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')), 'uploader_id': ('id', {str_or_none}),
'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')), })),
'view_count': int_or_none(get_video_info('count', 'view')), **traverse_obj(api_data, ('video', {
'tags': tags, 'id': ('id', {str_or_none}),
'genre': traverse_obj(api_data, ('genre', 'label'), ('genre', 'key')), 'title': ('title', {str}),
'comment_count': get_video_info('count', 'comment', expected_type=int), 'description': ('description', {clean_html}, filter),
'duration': ( 'duration': ('duration', {int_or_none}),
parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None)) 'timestamp': ('registeredAt', {parse_iso8601}),
or get_video_info('duration')), })),
'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}', **traverse_obj(api_data, ('video', 'count', {
'subtitles': self.extract_subtitles(video_id, api_data), 'comment_count': ('comment', {int_or_none}),
'like_count': ('like', {int_or_none}),
'view_count': ('view', {int_or_none}),
})),
} }
def _get_subtitles(self, video_id, api_data): def _get_subtitles(self, video_id, api_data):
@ -413,21 +509,19 @@ class NiconicoIE(NiconicoBaseIE):
return return
danmaku = traverse_obj(self._download_json( danmaku = traverse_obj(self._download_json(
f'{comments_info["server"]}/v1/threads', video_id, data=json.dumps({ f'{comments_info["server"]}/v1/threads', video_id,
'Downloading comments', 'Failed to download comments', headers={
'Content-Type': 'text/plain;charset=UTF-8',
'Origin': self._BASE_URL,
'Referer': f'{self._BASE_URL}/',
'X-Client-Os-Type': 'others',
**self._HEADERS,
}, data=json.dumps({
'additionals': {}, 'additionals': {},
'params': comments_info.get('params'), 'params': comments_info.get('params'),
'threadKey': comments_info.get('threadKey'), 'threadKey': comments_info.get('threadKey'),
}).encode(), fatal=False, }).encode(), fatal=False,
headers={ ), ('data', 'threads', ..., 'comments', ...))
'Referer': 'https://www.nicovideo.jp/',
'Origin': 'https://www.nicovideo.jp',
'Content-Type': 'text/plain;charset=UTF-8',
'x-client-os-type': 'others',
'x-frontend-id': '6',
'x-frontend-version': '0',
},
note='Downloading comments', errnote='Failed to download comments'),
('data', 'threads', ..., 'comments', ...))
return { return {
'comments': [{ 'comments': [{

Loading…
Cancel
Save