[extractor/kompas] Add extractor (#4562)

Authored by: HobbyistDev
pull/4575/head
HobbyistDev 2 years ago committed by GitHub
parent ad26f15a06
commit d380fc1614
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -765,6 +765,7 @@ from .kicker import KickerIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .kinja import KinjaEmbedIE from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE from .kinopoisk import KinoPoiskIE
from .kompas import KompasVideoIE
from .konserthusetplay import KonserthusetPlayIE from .konserthusetplay import KonserthusetPlayIE
from .koo import KooIE from .koo import KooIE
from .kth import KTHIE from .kth import KTHIE

@ -0,0 +1,68 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
float_or_none,
traverse_obj,
try_call,
)
# Video from www.kompas.tv and video.kompas.com seems use jixie player
# see [1] https://jixie.atlassian.net/servicedesk/customer/portal/2/article/1339654214?src=-1456335525,
# [2] https://scripts.jixie.media/jxvideo.3.1.min.js for more info
class KompasVideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.kompas\.com/\w+/(?P<id>\d+)/(?P<slug>[\w-]+)'
_TESTS = [{
'url': 'https://video.kompas.com/watch/164474/kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel',
'info_dict': {
'id': '164474',
'ext': 'mp4',
'title': 'Kim Jong Un Siap Kirim Nuklir Lawan AS dan Korsel',
'description': 'md5:262530c4fb7462398235f9a5dba92456',
'uploader_id': '9262bf2590d558736cac4fff7978fcb1',
'display_id': 'kim-jong-un-siap-kirim-nuklir-lawan-as-dan-korsel',
'duration': 85.066667,
'categories': ['news'],
'thumbnail': 'https://video.jixie.media/1001/164474/164474_1280x720.jpg',
'tags': 'count:9',
}
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'slug')
webpage = self._download_webpage(url, display_id)
json_data = self._download_json(
'https://apidam.jixie.io/api/public/stream', display_id,
query={'metadata': 'full', 'video_id': video_id})['data']
formats, subtitles = [], {}
for stream in json_data['streams']:
if stream.get('type') == 'HLS':
fmt, sub = self._extract_m3u8_formats_and_subtitles(stream.get('url'), display_id, ext='mp4')
formats.extend(fmt)
self._merge_subtitles(sub, target=subtitles)
else:
formats.append({
'url': stream.get('url'),
'width': stream.get('width'),
'height': stream.get('height'),
'ext': 'mp4',
})
self._sort_formats(formats)
return {
'id': video_id,
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'title': json_data.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage),
'description': (clean_html(traverse_obj(json_data, ('metadata', 'description')))
or self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage)),
'thumbnails': traverse_obj(json_data, ('metadata', 'thumbnails')),
'duration': float_or_none(traverse_obj(json_data, ('metadata', 'duration'))),
'tags': try_call(lambda: json_data['metadata']['keywords'].split(',')),
'categories': try_call(lambda: json_data['metadata']['categories'].split(',')),
'uploader_id': json_data.get('owner_id'),
}
Loading…
Cancel
Save