[extractor/AbemaTVTitle] Implement paging (#4376)

Authored by: Lesmiscore
pull/4396/head
Lesmiscore 2 years ago committed by GitHub
parent 8ef5af1942
commit bc83b4b06c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,5 +1,6 @@
import base64 import base64
import binascii import binascii
import functools
import hashlib import hashlib
import hmac import hmac
import io import io
@ -20,11 +21,11 @@ from ..utils import (
decode_base_n, decode_base_n,
int_or_none, int_or_none,
intlist_to_bytes, intlist_to_bytes,
OnDemandPagedList,
request_to_url, request_to_url,
time_seconds, time_seconds,
traverse_obj, traverse_obj,
update_url_query, update_url_query,
urljoin,
) )
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862) # NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
@ -145,76 +146,14 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
class AbemaTVBaseIE(InfoExtractor): class AbemaTVBaseIE(InfoExtractor):
def _extract_breadcrumb_list(self, webpage, video_id):
for jld in re.finditer(
r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
webpage):
jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
if jsonld:
if jsonld.get('@type') != 'BreadcrumbList':
continue
trav = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
if trav:
return trav
return []
class AbemaTVIE(AbemaTVBaseIE):
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
_NETRC_MACHINE = 'abematv'
_TESTS = [{
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
'info_dict': {
'id': '194-25_s2_p1',
'title': '第1話 「チーズケーキ」 「モーニング再び」',
'series': '異世界食堂2',
'series_number': 2,
'episode': '第1話 「チーズケーキ」 「モーニング再び」',
'episode_number': 1,
},
'skip': 'expired',
}, {
'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
'info_dict': {
'id': 'E8tvAnMJ7a9a5d',
'title': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】',
'series': 'ゆるキャン△ SEASON',
'episode': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】',
'series_number': 2,
'episode_number': 1,
'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
},
'skip': 'expired',
}, {
'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
'info_dict': {
'id': 'E8tvAnMJ7a9a5d',
'title': '第5話『光射す』',
'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
'thumbnail': r're:https://hayabusa\.io/.+',
'series': '相棒',
'episode': '第5話『光射す』',
},
'skip': 'expired',
}, {
'url': 'https://abema.tv/now-on-air/abema-anime',
'info_dict': {
'id': 'abema-anime',
# this varies
# 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
'is_live': True,
},
'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
}]
_USERTOKEN = None _USERTOKEN = None
_DEVICE_ID = None _DEVICE_ID = None
_TIMETABLE = None
_MEDIATOKEN = None _MEDIATOKEN = None
_SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe' _SECRETKEY = b'v+Gjs=25Aw5erR!J8ZuvRrCx*rGswhB&qdHd_SYerEWdU&a?3DzN9BRbp5KwY4hEmcj5#fykMjJ=AuWz5GSMY-d@H7DMEh3M@9n2G552Us$$k9cD=3TxwWe86!x#Zyhe'
def _generate_aks(self, deviceid): @classmethod
def _generate_aks(cls, deviceid):
deviceid = deviceid.encode('utf-8') deviceid = deviceid.encode('utf-8')
# add 1 hour and then drop minute and secs # add 1 hour and then drop minute and secs
ts_1hour = int((time_seconds(hours=9) // 3600 + 1) * 3600) ts_1hour = int((time_seconds(hours=9) // 3600 + 1) * 3600)
@ -225,7 +164,7 @@ class AbemaTVIE(AbemaTVBaseIE):
def mix_once(nonce): def mix_once(nonce):
nonlocal tmp nonlocal tmp
h = hmac.new(self._SECRETKEY, digestmod=hashlib.sha256) h = hmac.new(cls._SECRETKEY, digestmod=hashlib.sha256)
h.update(nonce) h.update(nonce)
tmp = h.digest() tmp = h.digest()
@ -238,7 +177,7 @@ class AbemaTVIE(AbemaTVBaseIE):
nonlocal tmp nonlocal tmp
mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce) mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce)
mix_once(self._SECRETKEY) mix_once(cls._SECRETKEY)
mix_tmp(time_struct.tm_mon) mix_tmp(time_struct.tm_mon)
mix_twist(deviceid) mix_twist(deviceid)
mix_tmp(time_struct.tm_mday % 5) mix_tmp(time_struct.tm_mday % 5)
@ -251,7 +190,7 @@ class AbemaTVIE(AbemaTVBaseIE):
if self._USERTOKEN: if self._USERTOKEN:
return self._USERTOKEN return self._USERTOKEN
self._DEVICE_ID = str(uuid.uuid4()) AbemaTVBaseIE._DEVICE_ID = str(uuid.uuid4())
aks = self._generate_aks(self._DEVICE_ID) aks = self._generate_aks(self._DEVICE_ID)
user_data = self._download_json( user_data = self._download_json(
'https://api.abema.io/v1/users', None, note='Authorizing', 'https://api.abema.io/v1/users', None, note='Authorizing',
@ -262,7 +201,7 @@ class AbemaTVIE(AbemaTVBaseIE):
headers={ headers={
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}) })
self._USERTOKEN = user_data['token'] AbemaTVBaseIE._USERTOKEN = user_data['token']
# don't allow adding it 2 times or more, though it's guarded # don't allow adding it 2 times or more, though it's guarded
remove_opener(self._downloader, AbemaLicenseHandler) remove_opener(self._downloader, AbemaLicenseHandler)
@ -274,7 +213,7 @@ class AbemaTVIE(AbemaTVBaseIE):
if not invalidate and self._MEDIATOKEN: if not invalidate and self._MEDIATOKEN:
return self._MEDIATOKEN return self._MEDIATOKEN
self._MEDIATOKEN = self._download_json( AbemaTVBaseIE._MEDIATOKEN = self._download_json(
'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False, 'https://api.abema.io/v1/media/token', None, note='Fetching media token' if to_show else False,
query={ query={
'osName': 'android', 'osName': 'android',
@ -284,11 +223,82 @@ class AbemaTVIE(AbemaTVBaseIE):
'appId': 'tv.abema', 'appId': 'tv.abema',
'appVersion': '3.27.1' 'appVersion': '3.27.1'
}, headers={ }, headers={
'Authorization': 'bearer ' + self._get_device_token() 'Authorization': f'bearer {self._get_device_token()}',
})['token'] })['token']
return self._MEDIATOKEN return self._MEDIATOKEN
def _call_api(self, endpoint, video_id, query=None, note='Downloading JSON metadata'):
return self._download_json(
f'https://api.abema.io/{endpoint}', video_id, query=query or {},
note=note,
headers={
'Authorization': f'bearer {self._get_device_token()}',
})
def _extract_breadcrumb_list(self, webpage, video_id):
for jld in re.finditer(
r'(?is)</span></li></ul><script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
webpage):
jsonld = self._parse_json(jld.group('json_ld'), video_id, fatal=False)
if traverse_obj(jsonld, '@type') != 'BreadcrumbList':
continue
items = traverse_obj(jsonld, ('itemListElement', ..., 'name'))
if items:
return items
return []
class AbemaTVIE(AbemaTVBaseIE):
_VALID_URL = r'https?://abema\.tv/(?P<type>now-on-air|video/episode|channels/.+?/slots)/(?P<id>[^?/]+)'
_NETRC_MACHINE = 'abematv'
_TESTS = [{
'url': 'https://abema.tv/video/episode/194-25_s2_p1',
'info_dict': {
'id': '194-25_s2_p1',
'title': '第1話 「チーズケーキ」 「モーニング再び」',
'series': '異世界食堂2',
'series_number': 2,
'episode': '第1話 「チーズケーキ」 「モーニング再び」',
'episode_number': 1,
},
'skip': 'expired',
}, {
'url': 'https://abema.tv/channels/anime-live2/slots/E8tvAnMJ7a9a5d',
'info_dict': {
'id': 'E8tvAnMJ7a9a5d',
'title': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】',
'series': 'ゆるキャン△ SEASON',
'episode': 'ゆるキャン△ SEASON 全話一挙【無料ビデオ72時間】',
'series_number': 2,
'episode_number': 1,
'description': 'md5:9c5a3172ae763278f9303922f0ea5b17',
},
'skip': 'expired',
}, {
'url': 'https://abema.tv/video/episode/87-877_s1282_p31047',
'info_dict': {
'id': 'E8tvAnMJ7a9a5d',
'title': '第5話『光射す』',
'description': 'md5:56d4fc1b4f7769ded5f923c55bb4695d',
'thumbnail': r're:https://hayabusa\.io/.+',
'series': '相棒',
'episode': '第5話『光射す』',
},
'skip': 'expired',
}, {
'url': 'https://abema.tv/now-on-air/abema-anime',
'info_dict': {
'id': 'abema-anime',
# this varies
# 'title': '女子高生の無駄づかい 全話一挙【無料ビデオ72時間】',
'description': 'md5:55f2e61f46a17e9230802d7bcc913d5f',
'is_live': True,
},
'skip': 'Not supported until yt-dlp implements native live downloader OR AbemaTV can start a local HTTP server',
}]
_TIMETABLE = None
def _perform_login(self, username, password): def _perform_login(self, username, password):
if '@' in username: # don't strictly check if it's email address or not if '@' in username: # don't strictly check if it's email address or not
ep, method = 'user/email', 'email' ep, method = 'user/email', 'email'
@ -301,13 +311,13 @@ class AbemaTVIE(AbemaTVBaseIE):
method: username, method: username,
'password': password 'password': password
}).encode('utf-8'), headers={ }).encode('utf-8'), headers={
'Authorization': 'bearer ' + self._get_device_token(), 'Authorization': f'bearer {self._get_device_token()}',
'Origin': 'https://abema.tv', 'Origin': 'https://abema.tv',
'Referer': 'https://abema.tv/', 'Referer': 'https://abema.tv/',
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}) })
self._USERTOKEN = login_response['token'] AbemaTVBaseIE._USERTOKEN = login_response['token']
self._get_media_token(True) self._get_media_token(True)
def _real_extract(self, url): def _real_extract(self, url):
@ -442,6 +452,7 @@ class AbemaTVIE(AbemaTVBaseIE):
class AbemaTVTitleIE(AbemaTVBaseIE): class AbemaTVTitleIE(AbemaTVBaseIE):
_VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)' _VALID_URL = r'https?://abema\.tv/video/title/(?P<id>[^?/]+)'
_PAGE_SIZE = 25
_TESTS = [{ _TESTS = [{
'url': 'https://abema.tv/video/title/90-1597', 'url': 'https://abema.tv/video/title/90-1597',
@ -457,18 +468,39 @@ class AbemaTVTitleIE(AbemaTVBaseIE):
'title': '真心が届く~僕とスターのオフィス・ラブ!?~', 'title': '真心が届く~僕とスターのオフィス・ラブ!?~',
}, },
'playlist_mincount': 16, 'playlist_mincount': 16,
}, {
'url': 'https://abema.tv/video/title/25-102',
'info_dict': {
'id': '25-102',
'title': 'ソードアート・オンライン アリシゼーション',
},
'playlist_mincount': 24,
}] }]
def _real_extract(self, url): def _fetch_page(self, playlist_id, series_version, page):
video_id = self._match_id(url) programs = self._call_api(
webpage = self._download_webpage(url, video_id) f'v1/video/series/{playlist_id}/programs', playlist_id,
note=f'Downloading page {page + 1}',
query={
'seriesVersion': series_version,
'offset': str(page * self._PAGE_SIZE),
'order': 'seq',
'limit': str(self._PAGE_SIZE),
})
yield from (
self.url_result(f'https://abema.tv/video/episode/{x}')
for x in traverse_obj(programs, ('programs', ..., 'id'), default=[]))
playlist_title, breadcrumb = None, self._extract_breadcrumb_list(webpage, video_id) def _entries(self, playlist_id, series_version):
if breadcrumb: return OnDemandPagedList(
playlist_title = breadcrumb[-1] functools.partial(self._fetch_page, playlist_id, series_version),
self._PAGE_SIZE)
playlist = [ def _real_extract(self, url):
self.url_result(urljoin('https://abema.tv/', mobj.group(1))) playlist_id = self._match_id(url)
for mobj in re.finditer(r'<li\s*class=".+?EpisodeList.+?"><a\s*href="(/[^"]+?)"', webpage)] series_info = self._call_api(f'v1/video/series/{playlist_id}', playlist_id)
return self.playlist_result(playlist, playlist_title=playlist_title, playlist_id=video_id) return self.playlist_result(
self._entries(playlist_id, series_info['version']), playlist_id=playlist_id,
playlist_title=series_info.get('title'),
playlist_description=series_info.get('content'))

Loading…
Cancel
Save