[extractor/noice] Add NoicePodcast extractor (#5621)

Authored by: HobbyistDev
pull/5538/head
HobbyistDev 2 years ago committed by GitHub
parent dfc186d422
commit 28b8f57b4b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1211,6 +1211,7 @@ from .nintendo import NintendoIE
from .nitter import NitterIE from .nitter import NitterIE
from .njpwworld import NJPWWorldIE from .njpwworld import NJPWWorldIE
from .nobelprize import NobelPrizeIE from .nobelprize import NobelPrizeIE
from .noice import NoicePodcastIE
from .nonktube import NonkTubeIE from .nonktube import NonkTubeIE
from .noodlemagazine import NoodleMagazineIE from .noodlemagazine import NoodleMagazineIE
from .noovo import NoovoIE from .noovo import NoovoIE

@ -0,0 +1,116 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
determine_ext,
int_or_none,
parse_iso8601,
traverse_obj,
variadic,
)
class NoicePodcastIE(InfoExtractor):
_VALID_URL = r'https?://open\.noice\.id/content/(?P<id>[a-fA-F0-9-]+)'
_TESTS = [{
'url': 'https://open.noice.id/content/7694bb04-ff0f-40fa-a60b-5b39f29584b2',
'info_dict': {
'id': '7694bb04-ff0f-40fa-a60b-5b39f29584b2',
'ext': 'm4a',
'season': 'Season 1',
'description': 'md5:58d1274e6857b6fbbecf47075885380d',
'release_date': '20221115',
'timestamp': 1668496642,
'season_number': 1,
'upload_date': '20221115',
'release_timestamp': 1668496642,
'title': 'Eps 1. Belajar dari Wishnutama: Kreatif Bukan Followers! (bersama Wishnutama)',
'modified_date': '20221121',
'categories': ['Bisnis dan Keuangan'],
'duration': 3567,
'modified_timestamp': 1669030647,
'thumbnail': 'https://images.noiceid.cc/catalog/content-1668496302560',
'channel_id': '9dab1024-5b92-4265-ae1c-63da87359832',
'like_count': int,
'channel': 'Noice Space Talks',
'comment_count': int,
'dislike_count': int,
'channel_follower_count': int,
}
}, {
'url': 'https://open.noice.id/content/222134e4-99f2-456f-b8a2-b8be404bf063',
'info_dict': {
'id': '222134e4-99f2-456f-b8a2-b8be404bf063',
'ext': 'm4a',
'release_timestamp': 1653488220,
'description': 'md5:35074f6190cef52b05dd133bb2ef460e',
'upload_date': '20220525',
'timestamp': 1653460637,
'release_date': '20220525',
'thumbnail': 'https://images.noiceid.cc/catalog/content-1653460337625',
'title': 'Eps 1: Dijodohin Sama Anak Pak RT',
'modified_timestamp': 1669030647,
'season_number': 1,
'modified_date': '20221121',
'categories': ['Cerita dan Drama'],
'duration': 1830,
'season': 'Season 1',
'channel_id': '60193f6b-d24d-4b23-913b-ceed5a731e74',
'dislike_count': int,
'like_count': int,
'comment_count': int,
'channel': 'Dear Jerome',
'channel_follower_count': int,
}
}]
def _get_formats_and_subtitles(self, media_url, video_id):
formats, subtitles = [], {}
for url in variadic(media_url):
ext = determine_ext(url)
if ext == 'm3u8':
fmts, subs = self._extract_m3u8_formats_and_subtitles(url, video_id)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
else:
formats.append({
'url': url,
'ext': 'mp3',
'vcodec': 'none',
'acodec': 'mp3',
})
return formats, subtitles
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
nextjs_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['contentDetails']
media_url_list = traverse_obj(nextjs_data, (('rawContentUrl', 'url'), ))
formats, subtitles = self._get_formats_and_subtitles(media_url_list, display_id)
return {
'id': nextjs_data.get('id') or display_id,
'title': nextjs_data.get('title') or self._html_search_meta('og:title', webpage),
'formats': formats,
'subtitles': subtitles,
'description': (nextjs_data.get('description') or clean_html(nextjs_data.get('htmlDescription'))
or self._html_search_meta(['description', 'og:description'], webpage)),
'thumbnail': nextjs_data.get('image') or self._html_search_meta('og:image', webpage),
'timestamp': parse_iso8601(nextjs_data.get('createdAt')),
'release_timestamp': parse_iso8601(nextjs_data.get('publishedAt')),
'modified_timestamp': parse_iso8601(
nextjs_data.get('updatedAt') or self._html_search_meta('og:updated_time', webpage)),
'duration': int_or_none(nextjs_data.get('duration')),
'categories': traverse_obj(nextjs_data, ('genres', ..., 'name')),
'season': nextjs_data.get('seasonName'),
'season_number': int_or_none(nextjs_data.get('seasonNumber')),
'channel': traverse_obj(nextjs_data, ('catalog', 'title')),
'channel_id': traverse_obj(nextjs_data, ('catalog', 'id'), 'catalogId'),
**traverse_obj(nextjs_data, ('meta', 'aggregations', {
'like_count': 'likes',
'dislike_count': 'dislikes',
'comment_count': 'comments',
'channel_follower_count': 'followers',
}))
}
Loading…
Cancel
Save