[ie/ondemandkorea] Overhaul extractor (#8386)

Closes #8374
Authored by: seproDev
pull/5552/head^2
sepro 1 year ago committed by GitHub
parent 3ff494f6f4
commit 05adfd883a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1387,7 +1387,10 @@ from .oftv import (
from .oktoberfesttv import OktoberfestTVIE from .oktoberfesttv import OktoberfestTVIE
from .olympics import OlympicsReplayIE from .olympics import OlympicsReplayIE
from .on24 import On24IE from .on24 import On24IE
from .ondemandkorea import OnDemandKoreaIE from .ondemandkorea import (
OnDemandKoreaIE,
OnDemandKoreaProgramIE,
)
from .onefootball import OneFootballIE from .onefootball import OneFootballIE
from .onenewsnz import OneNewsNZIE from .onenewsnz import OneNewsNZIE
from .oneplace import OnePlacePodcastIE from .oneplace import OnePlacePodcastIE

@ -1,87 +1,167 @@
import functools
import re import re
import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
js_to_json, OnDemandPagedList,
float_or_none,
int_or_none,
join_nonempty,
parse_age_limit,
parse_qs,
unified_strdate,
url_or_none,
) )
from ..utils.traversal import traverse_obj
class OnDemandKoreaIE(InfoExtractor): class OnDemandKoreaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html' _VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/[a-z0-9-]+\?(?:[^#]+&)?contentId=(?P<id>\d+)'
_GEO_COUNTRIES = ['US', 'CA'] _GEO_COUNTRIES = ['US', 'CA']
_TESTS = [{ _TESTS = [{
'url': 'https://www.ondemandkorea.com/ask-us-anything-e351.html', 'url': 'https://www.ondemandkorea.com/player/vod/ask-us-anything?contentId=686471',
'md5': 'e2ff77255d989e3135bde0c5889fbce8',
'info_dict': { 'info_dict': {
'id': 'ask-us-anything-e351', 'id': '686471',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ask Us Anything : Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won - 09/24/2022', 'title': 'Ask Us Anything: Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
'description': 'A talk show/game show with a school theme where celebrity guests appear as “transfer students.”', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'thumbnail': r're:^https?://.*\.jpg$', 'duration': 5486.955,
'release_date': '20220924',
'series': 'Ask Us Anything',
'series_id': 11790,
'episode_number': 351,
'episode': 'Jung Sung-ho, Park Seul-gi, Kim Bo-min, Yang Seung-won',
}, },
'params': {
'skip_download': 'm3u8 download'
}
}, { }, {
'url': 'https://www.ondemandkorea.com/work-later-drink-now-e1.html', 'url': 'https://www.ondemandkorea.com/player/vod/breakup-probation-a-week?contentId=1595796',
'md5': '57266c720006962be7ff415b24775caa',
'info_dict': { 'info_dict': {
'id': 'work-later-drink-now-e1', 'id': '1595796',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Work Later, Drink Now : E01', 'title': 'Breakup Probation, A Week: E08',
'description': 'Work Later, Drink First follows three women who find solace in a glass of liquor at the end of the day. So-hee, who gets comfort from a cup of soju af', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'thumbnail': r're:^https?://.*\.png$', 'duration': 1586.0,
'subtitles': { 'release_date': '20231001',
'English': 'mincount:1', 'series': 'Breakup Probation, A Week',
}, 'series_id': 22912,
'episode_number': 8,
'episode': 'E08',
}, },
'params': { }, {
'skip_download': 'm3u8 download' 'url': 'https://www.ondemandkorea.com/player/vod/the-outlaws?contentId=369531',
} 'md5': 'fa5523b87aa1f6d74fc622a97f2b47cd',
'info_dict': {
'id': '369531',
'ext': 'mp4',
'release_date': '20220519',
'duration': 7267.0,
'title': 'The Outlaws: Main Movie',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)',
'age_limit': 18,
},
}, {
'url': 'https://www.ondemandkorea.com/en/player/vod/capture-the-moment-how-is-that-possible?contentId=1605006',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id, fatal=False)
data = self._download_json(
if not webpage: f'https://odkmedia.io/odx/api/v3/playback/{video_id}/', video_id, fatal=False,
# Page sometimes returns captcha page with HTTP 403 headers={'service-name': 'odk'}, query={'did': str(uuid.uuid4())}, expected_status=(403, 404))
raise ExtractorError( if not traverse_obj(data, ('result', {dict})):
'Unable to access page. You may have been blocked.', msg = traverse_obj(data, ('messages', '__default'), 'title', expected_type=str)
expected=True) raise ExtractorError(msg or 'Got empty response from playback API', expected=True)
if 'msg_block_01.png' in webpage: data = data['result']
self.raise_geo_restricted(
msg='This content is not available in your region', def try_geo_bypass(url):
countries=self._GEO_COUNTRIES) return traverse_obj(url, ({parse_qs}, 'stream_url', 0, {url_or_none})) or url
if 'This video is only available to ODK PLUS members.' in webpage: def try_upgrade_quality(url):
raise ExtractorError( mod_url = re.sub(r'_720(p?)\.m3u8', r'_1080\1.m3u8', url)
'This video is only available to ODK PLUS members.', return mod_url if mod_url != url and self._request_webpage(
expected=True) HEADRequest(mod_url), video_id, note='Checking for higher quality format',
errnote='No higher quality format found', fatal=False) else url
if 'ODK PREMIUM Members Only' in webpage:
raise ExtractorError( formats = []
'This video is only available to ODK PREMIUM members.', for m3u8_url in traverse_obj(data, (('sources', 'manifest'), ..., 'url', {url_or_none}, {try_geo_bypass})):
expected=True) formats.extend(self._extract_m3u8_formats(try_upgrade_quality(m3u8_url), video_id, fatal=False))
title = self._search_regex( subtitles = {}
r'class=["\']episode_title["\'][^>]*>([^<]+)', for track in traverse_obj(data, ('text_tracks', lambda _, v: url_or_none(v['url']))):
webpage, 'episode_title', fatal=False) or self._og_search_title(webpage) subtitles.setdefault(track.get('language', 'und'), []).append({
'url': track['url'],
jw_config = self._parse_json( 'ext': track.get('codec'),
self._search_regex(( 'name': track.get('label'),
r'(?P<options>{\s*[\'"]tracks[\'"].*?})[)\];]+$', })
r'playlist\s*=\s*\[(?P<options>.+)];?$',
r'odkPlayer\.init.*?(?P<options>{[^;]+}).*?;', def if_series(key=None):
), webpage, 'jw config', flags=re.MULTILINE | re.DOTALL, group='options'), return lambda obj: obj[key] if key and obj['kind'] == 'series' else None
video_id, transform_source=js_to_json)
info = self._parse_jwplayer_data( return {
jw_config, video_id, require_title=False, m3u8_id='hls', 'id': video_id,
base_url=url) 'title': join_nonempty(
('episode', 'program', 'title'),
info.update({ ('episode', 'title'), from_dict=data, delim=': '),
'title': title, **traverse_obj(data, {
'description': self._og_search_description(webpage), 'thumbnail': ('episode', 'images', 'thumbnail', {url_or_none}),
'thumbnail': self._og_search_thumbnail(webpage) 'release_date': ('episode', 'release_date', {lambda x: x.replace('-', '')}, {unified_strdate}),
}) 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}),
return info 'age_limit': ('age_rating', 'name', {lambda x: x.replace('R', '')}, {parse_age_limit}),
'series': ('episode', {if_series(key='program')}, 'title'),
'series_id': ('episode', {if_series(key='program')}, 'id'),
'episode': ('episode', {if_series(key='title')}),
'episode_number': ('episode', {if_series(key='number')}, {int_or_none}),
}, get_all=False),
'formats': formats,
'subtitles': subtitles,
}
class OnDemandKoreaProgramIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?:en/)?player/vod/(?P<id>[a-z0-9-]+)(?:$|#)'
_GEO_COUNTRIES = ['US', 'CA']
_TESTS = [{
'url': 'https://www.ondemandkorea.com/player/vod/uskn-news',
'info_dict': {
'id': 'uskn-news',
},
'playlist_mincount': 755,
}, {
'url': 'https://www.ondemandkorea.com/en/player/vod/the-land',
'info_dict': {
'id': 'the-land',
},
'playlist_count': 52,
}]
_PAGE_SIZE = 100
def _fetch_page(self, display_id, page):
page += 1
page_data = self._download_json(
f'https://odkmedia.io/odx/api/v3/program/{display_id}/episodes/', display_id,
headers={'service-name': 'odk'}, query={
'page': page,
'page_size': self._PAGE_SIZE,
}, note=f'Downloading page {page}', expected_status=404)
for episode in traverse_obj(page_data, ('result', 'results', ...)):
yield self.url_result(
f'https://www.ondemandkorea.com/player/vod/{display_id}?contentId={episode["id"]}',
ie=OnDemandKoreaIE, video_title=episode.get('title'))
def _real_extract(self, url):
display_id = self._match_id(url)
entries = OnDemandPagedList(functools.partial(
self._fetch_page, display_id), self._PAGE_SIZE)
return self.playlist_result(entries, display_id)

Loading…
Cancel
Save