[Koo] Add extractor (#1044)

Authored by: Ashish0804
pull/1069/head
Ashish Gupta 3 years ago committed by GitHub
parent d1a7768432
commit 9ada988bfc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -632,6 +632,7 @@ from .kickstarter import KickStarterIE
from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from .konserthusetplay import KonserthusetPlayIE
from .koo import KooIE
from .krasview import KrasViewIE
from .ku6 import Ku6IE
from .kusi import KUSIIE

@ -0,0 +1,116 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
clean_html,
try_get,
)
class KooIE(InfoExtractor):
_VALID_URL = r'(?:https?://)(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
_TESTS = [{ # Test for video in the comments
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',
'info_dict': {
'id': '946c4189-bc2d-4524-b95b-43f641e2adde',
'ext': 'mp4',
'title': 'test for video in comment',
'description': 'md5:daa77dc214add4da8b6ea7d2226776e7',
'timestamp': 1632215195,
'uploader_id': 'ytdlpTestAccount',
'uploader': 'yt-dlpTestAccount',
'duration': 7000,
'upload_date': '20210921'
},
'params': {'skip_download': True}
}, { # Test for koo with long title
'url': 'https://www.kooapp.com/koo/laxman_kumarDBFEC/33decbf7-5e1e-4bb8-bfd7-04744a064361',
'info_dict': {
'id': '33decbf7-5e1e-4bb8-bfd7-04744a064361',
'ext': 'mp4',
'title': 'md5:47a71c2337295330c5a19a8af1bbf450',
'description': 'md5:06a6a84e9321499486dab541693d8425',
'timestamp': 1632106884,
'uploader_id': 'laxman_kumarDBFEC',
'uploader': 'Laxman Kumar 🇮🇳',
'duration': 46000,
'upload_date': '20210920'
},
'params': {'skip_download': True}
}, { # Test for audio
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
'info_dict': {
'id': 'a2a9c88e-ce4b-4d2d-952f-d06361c5b602',
'ext': 'mp4',
'title': 'Test for audio',
'description': 'md5:ecb9a2b6a5d34b736cecb53788cb11e8',
'timestamp': 1632211634,
'uploader_id': 'ytdlpTestAccount',
'uploader': 'yt-dlpTestAccount',
'duration': 214000,
'upload_date': '20210921'
},
'params': {'skip_download': True}
}, { # Test for video
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
'info_dict': {
'id': 'a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1',
'ext': 'mp4',
'title': 'Test for video',
'description': 'md5:7afc4eb839074ddeb2beea5dd6fe9500',
'timestamp': 1632211468,
'uploader_id': 'ytdlpTestAccount',
'uploader': 'yt-dlpTestAccount',
'duration': 14000,
'upload_date': '20210921'
},
'params': {'skip_download': True}
}, { # Test for link
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/01bf5b94-81a5-4d8e-a387-5f732022e15a',
'skip': 'No video/audio found at the provided url.',
'info_dict': {
'id': '01bf5b94-81a5-4d8e-a387-5f732022e15a',
'title': 'Test for link',
'ext': 'none',
},
}, { # Test for images
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
'skip': 'No video/audio found at the provided url.',
'info_dict': {
'id': 'dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb',
'title': 'Test for images',
'ext': 'none',
},
}]
def _real_extract(self, url):
id = self._match_id(url)
data_json = self._download_json(f'https://www.kooapp.com/apiV1/ku/{id}?limit=20&offset=0&showSimilarKoos=true', id)['parentContent']
item_json = next(content['items'][0] for content in data_json
if try_get(content, lambda x: x['items'][0]['id']) == id)
media_json = item_json['mediaMap']
formats = []
mp4_url = media_json.get('videoMp4')
video_m3u8_url = media_json.get('videoHls')
if mp4_url:
formats.append({
'url': mp4_url,
'ext': 'mp4',
})
if video_m3u8_url:
formats.extend(self._extract_m3u8_formats(video_m3u8_url, id, fatal=False, ext='mp4'))
if not formats:
self.raise_no_formats('No video/audio found at the provided url.', expected=True)
self._sort_formats(formats)
return {
'id': id,
'title': clean_html(item_json.get('title')),
'description': f'{clean_html(item_json.get("title"))}\n\n{clean_html(item_json.get("enTransliteration"))}',
'timestamp': item_json.get('createdAt'),
'uploader_id': item_json.get('handle'),
'uploader': item_json.get('name'),
'duration': media_json.get('duration'),
'formats': formats,
}
Loading…
Cancel
Save