mirror of https://github.com/yt-dlp/yt-dlp
Update to ytdl-commit-be008e6 (#8836)
- [utils] Make restricted filenames ignore some Unicode categories (by dirkf) - [ie/telewebion] Fix extraction (by Grub4K) - [ie/imgur] Overhaul extractor (by bashonly, Grub4K) - [ie/EpidemicSound] Add extractor (by Grub4K) Authored by: bashonly, dirkf, Grub4K Co-authored-by: bashonly <bashonly@protonmail.com>pull/8885/head
parent
c39358a54b
commit
65de7d204c
@ -0,0 +1,107 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
parse_iso8601,
|
||||
parse_qs,
|
||||
parse_resolution,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class EpidemicSoundIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
|
||||
'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
|
||||
'info_dict': {
|
||||
'id': '45014',
|
||||
'display_id': 'yFfQVRpSPz',
|
||||
'ext': 'mp3',
|
||||
'title': 'Door Knock Door 1',
|
||||
'alt_title': 'Door Knock Door 1',
|
||||
'tags': ['foley', 'door', 'knock', 'glass', 'window', 'glass door knock'],
|
||||
'categories': ['Misc. Door'],
|
||||
'duration': 1,
|
||||
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg',
|
||||
'timestamp': 1415320353,
|
||||
'upload_date': '20141107',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.epidemicsound.com/track/mj8GTTwsZd/',
|
||||
'md5': 'c82b745890f9baf18dc2f8d568ee3830',
|
||||
'info_dict': {
|
||||
'id': '148700',
|
||||
'display_id': 'mj8GTTwsZd',
|
||||
'ext': 'mp3',
|
||||
'title': 'Noplace',
|
||||
'tags': ['liquid drum n bass', 'energetic'],
|
||||
'categories': ['drum and bass'],
|
||||
'duration': 237,
|
||||
'timestamp': 1694426482,
|
||||
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/11138/3000x3000.jpg',
|
||||
'upload_date': '20230911',
|
||||
'release_timestamp': 1700535606,
|
||||
'release_date': '20231121',
|
||||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _epidemic_parse_thumbnail(url: str):
|
||||
if not url_or_none(url):
|
||||
return None
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
**(traverse_obj(url, ({parse_qs}, {
|
||||
'width': ('width', 0, {int_or_none}),
|
||||
'height': ('height', 0, {int_or_none}),
|
||||
})) or parse_resolution(url)),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _epidemic_fmt_or_none(f):
|
||||
if not f.get('format'):
|
||||
f['format'] = f.get('format_id')
|
||||
elif not f.get('format_id'):
|
||||
f['format_id'] = f['format']
|
||||
if not f['url'] or not f['format']:
|
||||
return None
|
||||
if f.get('format_note'):
|
||||
f['format_note'] = f'track ID {f["format_note"]}'
|
||||
if f['format'] != 'full':
|
||||
f['preference'] = -2
|
||||
return f
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
json_data = self._download_json(f'https://www.epidemicsound.com/json/track/{video_id}', video_id)
|
||||
|
||||
thumbnails = traverse_obj(json_data, [('imageUrl', 'cover')])
|
||||
thumb_base_url = traverse_obj(json_data, ('coverArt', 'baseUrl', {url_or_none}))
|
||||
if thumb_base_url:
|
||||
thumbnails.extend(traverse_obj(json_data, (
|
||||
'coverArt', 'sizes', ..., {thumb_base_url.__add__})))
|
||||
|
||||
return traverse_obj(json_data, {
|
||||
'id': ('id', {str_or_none}),
|
||||
'display_id': ('publicSlug', {str}),
|
||||
'title': ('title', {str}),
|
||||
'alt_title': ('oldTitle', {str}),
|
||||
'duration': ('length', {float_or_none}),
|
||||
'timestamp': ('added', {parse_iso8601}),
|
||||
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||
'categories': ('genres', ..., 'tag', {str}),
|
||||
'tags': ('metadataTags', ..., {str}),
|
||||
'age_limit': ('isExplicit', {lambda b: 18 if b else None}),
|
||||
'thumbnails': ({lambda _: thumbnails}, {orderedSet}, ..., {self._epidemic_parse_thumbnail}),
|
||||
'formats': ('stems', {dict.items}, ..., {
|
||||
'format': (0, {str_or_none}),
|
||||
'format_note': (1, 's3TrackId', {str_or_none}),
|
||||
'format_id': (1, 'stemType', {str}),
|
||||
'url': (1, 'lqMp3Url', {url_or_none}),
|
||||
}, {self._epidemic_fmt_or_none}),
|
||||
})
|
@ -1,52 +1,133 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from functools import partial
|
||||
from textwrap import dedent
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, format_field, int_or_none, parse_iso8601
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class TelewebionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?telewebion\.com/#!/episode/(?P<id>\d+)'
|
||||
def _fmt_url(url):
|
||||
return partial(format_field, template=url, default=None)
|
||||
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.telewebion.com/#!/episode/1263668/',
|
||||
class TelewebionIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?telewebion\.com/episode/(?P<id>(?:0x[a-fA-F\d]+|\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.telewebion.com/episode/0x1b3139c/',
|
||||
'info_dict': {
|
||||
'id': '1263668',
|
||||
'id': '0x1b3139c',
|
||||
'ext': 'mp4',
|
||||
'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'title': 'قرعهکشی لیگ قهرمانان اروپا',
|
||||
'series': '+ فوتبال',
|
||||
'series_id': '0x1b2505c',
|
||||
'channel': 'شبکه 3',
|
||||
'channel_id': '0x1b1a761',
|
||||
'channel_url': 'https://telewebion.com/live/tv3',
|
||||
'timestamp': 1425522414,
|
||||
'upload_date': '20150305',
|
||||
'release_timestamp': 1425517020,
|
||||
'release_date': '20150305',
|
||||
'duration': 420,
|
||||
'view_count': int,
|
||||
'tags': ['ورزشی', 'لیگ اروپا', 'اروپا'],
|
||||
'thumbnail': 'https://static.telewebion.com/episodeImages/YjFhM2MxMDBkMDNiZTU0MjE5YjQ3ZDY0Mjk1ZDE0ZmUwZWU3OTE3OWRmMDAyODNhNzNkNjdmMWMzMWIyM2NmMA/default',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
}, {
|
||||
'url': 'https://telewebion.com/episode/162175536',
|
||||
'info_dict': {
|
||||
'id': '0x9aa9a30',
|
||||
'ext': 'mp4',
|
||||
'title': 'کارما یعنی این !',
|
||||
'series': 'پاورقی',
|
||||
'series_id': '0x29a7426',
|
||||
'channel': 'شبکه 2',
|
||||
'channel_id': '0x1b1a719',
|
||||
'channel_url': 'https://telewebion.com/live/tv2',
|
||||
'timestamp': 1699979968,
|
||||
'upload_date': '20231114',
|
||||
'release_timestamp': 1699991638,
|
||||
'release_date': '20231114',
|
||||
'duration': 78,
|
||||
'view_count': int,
|
||||
'tags': ['کلیپ های منتخب', ' کلیپ طنز ', ' کلیپ سیاست ', 'پاورقی', 'ویژه فلسطین'],
|
||||
'thumbnail': 'https://static.telewebion.com/episodeImages/871e9455-7567-49a5-9648-34c22c197f5f/default',
|
||||
},
|
||||
}
|
||||
'skip_download': 'm3u8',
|
||||
}]
|
||||
|
||||
def _call_graphql_api(
|
||||
self, operation, video_id, query,
|
||||
variables: dict[str, tuple[str, str]] | None = None,
|
||||
note='Downloading GraphQL JSON metadata',
|
||||
):
|
||||
parameters = ''
|
||||
if variables:
|
||||
parameters = ', '.join(f'${name}: {type_}' for name, (type_, _) in variables.items())
|
||||
parameters = f'({parameters})'
|
||||
|
||||
result = self._download_json('https://graph.telewebion.com/graphql', video_id, note, data=json.dumps({
|
||||
'operationName': operation,
|
||||
'query': f'query {operation}{parameters} @cacheControl(maxAge: 60) {{{query}\n}}\n',
|
||||
'variables': {name: value for name, (_, value) in (variables or {}).items()}
|
||||
}, separators=(',', ':')).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
})
|
||||
if not result or traverse_obj(result, 'errors'):
|
||||
message = ', '.join(traverse_obj(result, ('errors', ..., 'message', {str})))
|
||||
raise ExtractorError(message or 'Unknown GraphQL API error')
|
||||
|
||||
return result['data']
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
if not video_id.startswith('0x'):
|
||||
video_id = hex(int(video_id))
|
||||
|
||||
secure_token = self._download_webpage(
|
||||
'http://m.s2.telewebion.com/op/op?action=getSecurityToken', video_id)
|
||||
episode_details = self._download_json(
|
||||
'http://m.s2.telewebion.com/op/op', video_id,
|
||||
query={'action': 'getEpisodeDetails', 'episode_id': video_id})
|
||||
|
||||
m3u8_url = 'http://m.s1.telewebion.com/smil/%s.m3u8?filepath=%s&m3u8=1&secure_token=%s' % (
|
||||
video_id, episode_details['file_path'], secure_token)
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', m3u8_id='hls')
|
||||
|
||||
picture_paths = [
|
||||
episode_details.get('picture_path'),
|
||||
episode_details.get('large_picture_path'),
|
||||
]
|
||||
|
||||
thumbnails = [{
|
||||
'url': picture_path,
|
||||
'preference': idx,
|
||||
} for idx, picture_path in enumerate(picture_paths) if picture_path is not None]
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': episode_details['title'],
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'view_count': episode_details.get('view_count'),
|
||||
episode_data = self._call_graphql_api('getEpisodeDetail', video_id, dedent('''
|
||||
queryEpisode(filter: {EpisodeID: $EpisodeId}, first: 1) {
|
||||
title
|
||||
program {
|
||||
ProgramID
|
||||
title
|
||||
}
|
||||
image
|
||||
view_count
|
||||
duration
|
||||
started_at
|
||||
created_at
|
||||
channel {
|
||||
ChannelID
|
||||
name
|
||||
descriptor
|
||||
}
|
||||
tags {
|
||||
name
|
||||
}
|
||||
}
|
||||
'''), {'EpisodeId': ('[ID!]', video_id)})
|
||||
|
||||
info_dict = traverse_obj(episode_data, ('queryEpisode', 0, {
|
||||
'title': ('title', {str}),
|
||||
'view_count': ('view_count', {int_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'tags': ('tags', ..., 'name', {str}),
|
||||
'release_timestamp': ('started_at', {parse_iso8601}),
|
||||
'timestamp': ('created_at', {parse_iso8601}),
|
||||
'series': ('program', 'title', {str}),
|
||||
'series_id': ('program', 'ProgramID', {str}),
|
||||
'channel': ('channel', 'name', {str}),
|
||||
'channel_id': ('channel', 'ChannelID', {str}),
|
||||
'channel_url': ('channel', 'descriptor', {_fmt_url('https://telewebion.com/live/%s')}),
|
||||
'thumbnail': ('image', {_fmt_url('https://static.telewebion.com/episodeImages/%s/default')}),
|
||||
'formats': (
|
||||
'channel', 'descriptor', {str},
|
||||
{_fmt_url(f'https://cdna.telewebion.com/%s/episode/{video_id}/playlist.m3u8')},
|
||||
{partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}),
|
||||
}))
|
||||
info_dict['id'] = video_id
|
||||
return info_dict
|
||||
|
Loading…
Reference in New Issue