mirror of https://github.com/yt-dlp/yt-dlp
Update to ytdl-commit-be008e6 (#8836)
- [utils] Make restricted filenames ignore some Unicode categories (by dirkf) - [ie/telewebion] Fix extraction (by Grub4K) - [ie/imgur] Overhaul extractor (by bashonly, Grub4K) - [ie/EpidemicSound] Add extractor (by Grub4K) Authored by: bashonly, dirkf, Grub4K Co-authored-by: bashonly <bashonly@protonmail.com>pull/8885/head
parent
c39358a54b
commit
65de7d204c
@ -0,0 +1,107 @@
|
|||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
orderedSet,
|
||||||
|
parse_iso8601,
|
||||||
|
parse_qs,
|
||||||
|
parse_resolution,
|
||||||
|
str_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EpidemicSoundIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?epidemicsound\.com/track/(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.epidemicsound.com/track/yFfQVRpSPz/',
|
||||||
|
'md5': 'd98ff2ddb49e8acab9716541cbc9dfac',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '45014',
|
||||||
|
'display_id': 'yFfQVRpSPz',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Door Knock Door 1',
|
||||||
|
'alt_title': 'Door Knock Door 1',
|
||||||
|
'tags': ['foley', 'door', 'knock', 'glass', 'window', 'glass door knock'],
|
||||||
|
'categories': ['Misc. Door'],
|
||||||
|
'duration': 1,
|
||||||
|
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/default-sfx/3000x3000.jpg',
|
||||||
|
'timestamp': 1415320353,
|
||||||
|
'upload_date': '20141107',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.epidemicsound.com/track/mj8GTTwsZd/',
|
||||||
|
'md5': 'c82b745890f9baf18dc2f8d568ee3830',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '148700',
|
||||||
|
'display_id': 'mj8GTTwsZd',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Noplace',
|
||||||
|
'tags': ['liquid drum n bass', 'energetic'],
|
||||||
|
'categories': ['drum and bass'],
|
||||||
|
'duration': 237,
|
||||||
|
'timestamp': 1694426482,
|
||||||
|
'thumbnail': 'https://cdn.epidemicsound.com/curation-assets/commercial-release-cover-images/11138/3000x3000.jpg',
|
||||||
|
'upload_date': '20230911',
|
||||||
|
'release_timestamp': 1700535606,
|
||||||
|
'release_date': '20231121',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _epidemic_parse_thumbnail(url: str):
|
||||||
|
if not url_or_none(url):
|
||||||
|
return None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'url': url,
|
||||||
|
**(traverse_obj(url, ({parse_qs}, {
|
||||||
|
'width': ('width', 0, {int_or_none}),
|
||||||
|
'height': ('height', 0, {int_or_none}),
|
||||||
|
})) or parse_resolution(url)),
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _epidemic_fmt_or_none(f):
|
||||||
|
if not f.get('format'):
|
||||||
|
f['format'] = f.get('format_id')
|
||||||
|
elif not f.get('format_id'):
|
||||||
|
f['format_id'] = f['format']
|
||||||
|
if not f['url'] or not f['format']:
|
||||||
|
return None
|
||||||
|
if f.get('format_note'):
|
||||||
|
f['format_note'] = f'track ID {f["format_note"]}'
|
||||||
|
if f['format'] != 'full':
|
||||||
|
f['preference'] = -2
|
||||||
|
return f
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
json_data = self._download_json(f'https://www.epidemicsound.com/json/track/{video_id}', video_id)
|
||||||
|
|
||||||
|
thumbnails = traverse_obj(json_data, [('imageUrl', 'cover')])
|
||||||
|
thumb_base_url = traverse_obj(json_data, ('coverArt', 'baseUrl', {url_or_none}))
|
||||||
|
if thumb_base_url:
|
||||||
|
thumbnails.extend(traverse_obj(json_data, (
|
||||||
|
'coverArt', 'sizes', ..., {thumb_base_url.__add__})))
|
||||||
|
|
||||||
|
return traverse_obj(json_data, {
|
||||||
|
'id': ('id', {str_or_none}),
|
||||||
|
'display_id': ('publicSlug', {str}),
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'alt_title': ('oldTitle', {str}),
|
||||||
|
'duration': ('length', {float_or_none}),
|
||||||
|
'timestamp': ('added', {parse_iso8601}),
|
||||||
|
'release_timestamp': ('releaseDate', {parse_iso8601}),
|
||||||
|
'categories': ('genres', ..., 'tag', {str}),
|
||||||
|
'tags': ('metadataTags', ..., {str}),
|
||||||
|
'age_limit': ('isExplicit', {lambda b: 18 if b else None}),
|
||||||
|
'thumbnails': ({lambda _: thumbnails}, {orderedSet}, ..., {self._epidemic_parse_thumbnail}),
|
||||||
|
'formats': ('stems', {dict.items}, ..., {
|
||||||
|
'format': (0, {str_or_none}),
|
||||||
|
'format_note': (1, 's3TrackId', {str_or_none}),
|
||||||
|
'format_id': (1, 'stemType', {str}),
|
||||||
|
'url': (1, 'lqMp3Url', {url_or_none}),
|
||||||
|
}, {self._epidemic_fmt_or_none}),
|
||||||
|
})
|
@ -1,52 +1,133 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from functools import partial
|
||||||
|
from textwrap import dedent
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import ExtractorError, format_field, int_or_none, parse_iso8601
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class TelewebionIE(InfoExtractor):
|
def _fmt_url(url):
|
||||||
_VALID_URL = r'https?://(?:www\.)?telewebion\.com/#!/episode/(?P<id>\d+)'
|
return partial(format_field, template=url, default=None)
|
||||||
|
|
||||||
|
|
||||||
_TEST = {
|
class TelewebionIE(InfoExtractor):
|
||||||
'url': 'http://www.telewebion.com/#!/episode/1263668/',
|
_VALID_URL = r'https?://(?:www\.)?telewebion\.com/episode/(?P<id>(?:0x[a-fA-F\d]+|\d+))'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.telewebion.com/episode/0x1b3139c/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1263668',
|
'id': '0x1b3139c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'قرعه\u200cکشی لیگ قهرمانان اروپا',
|
'title': 'قرعهکشی لیگ قهرمانان اروپا',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'series': '+ فوتبال',
|
||||||
|
'series_id': '0x1b2505c',
|
||||||
|
'channel': 'شبکه 3',
|
||||||
|
'channel_id': '0x1b1a761',
|
||||||
|
'channel_url': 'https://telewebion.com/live/tv3',
|
||||||
|
'timestamp': 1425522414,
|
||||||
|
'upload_date': '20150305',
|
||||||
|
'release_timestamp': 1425517020,
|
||||||
|
'release_date': '20150305',
|
||||||
|
'duration': 420,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'tags': ['ورزشی', 'لیگ اروپا', 'اروپا'],
|
||||||
|
'thumbnail': 'https://static.telewebion.com/episodeImages/YjFhM2MxMDBkMDNiZTU0MjE5YjQ3ZDY0Mjk1ZDE0ZmUwZWU3OTE3OWRmMDAyODNhNzNkNjdmMWMzMWIyM2NmMA/default',
|
||||||
},
|
},
|
||||||
'params': {
|
'skip_download': 'm3u8',
|
||||||
# m3u8 download
|
}, {
|
||||||
'skip_download': True,
|
'url': 'https://telewebion.com/episode/162175536',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0x9aa9a30',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'کارما یعنی این !',
|
||||||
|
'series': 'پاورقی',
|
||||||
|
'series_id': '0x29a7426',
|
||||||
|
'channel': 'شبکه 2',
|
||||||
|
'channel_id': '0x1b1a719',
|
||||||
|
'channel_url': 'https://telewebion.com/live/tv2',
|
||||||
|
'timestamp': 1699979968,
|
||||||
|
'upload_date': '20231114',
|
||||||
|
'release_timestamp': 1699991638,
|
||||||
|
'release_date': '20231114',
|
||||||
|
'duration': 78,
|
||||||
|
'view_count': int,
|
||||||
|
'tags': ['کلیپ های منتخب', ' کلیپ طنز ', ' کلیپ سیاست ', 'پاورقی', 'ویژه فلسطین'],
|
||||||
|
'thumbnail': 'https://static.telewebion.com/episodeImages/871e9455-7567-49a5-9648-34c22c197f5f/default',
|
||||||
},
|
},
|
||||||
}
|
'skip_download': 'm3u8',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _call_graphql_api(
|
||||||
|
self, operation, video_id, query,
|
||||||
|
variables: dict[str, tuple[str, str]] | None = None,
|
||||||
|
note='Downloading GraphQL JSON metadata',
|
||||||
|
):
|
||||||
|
parameters = ''
|
||||||
|
if variables:
|
||||||
|
parameters = ', '.join(f'${name}: {type_}' for name, (type_, _) in variables.items())
|
||||||
|
parameters = f'({parameters})'
|
||||||
|
|
||||||
|
result = self._download_json('https://graph.telewebion.com/graphql', video_id, note, data=json.dumps({
|
||||||
|
'operationName': operation,
|
||||||
|
'query': f'query {operation}{parameters} @cacheControl(maxAge: 60) {{{query}\n}}\n',
|
||||||
|
'variables': {name: value for name, (_, value) in (variables or {}).items()}
|
||||||
|
}, separators=(',', ':')).encode(), headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': 'application/json',
|
||||||
|
})
|
||||||
|
if not result or traverse_obj(result, 'errors'):
|
||||||
|
message = ', '.join(traverse_obj(result, ('errors', ..., 'message', {str})))
|
||||||
|
raise ExtractorError(message or 'Unknown GraphQL API error')
|
||||||
|
|
||||||
|
return result['data']
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
if not video_id.startswith('0x'):
|
||||||
|
video_id = hex(int(video_id))
|
||||||
|
|
||||||
secure_token = self._download_webpage(
|
episode_data = self._call_graphql_api('getEpisodeDetail', video_id, dedent('''
|
||||||
'http://m.s2.telewebion.com/op/op?action=getSecurityToken', video_id)
|
queryEpisode(filter: {EpisodeID: $EpisodeId}, first: 1) {
|
||||||
episode_details = self._download_json(
|
title
|
||||||
'http://m.s2.telewebion.com/op/op', video_id,
|
program {
|
||||||
query={'action': 'getEpisodeDetails', 'episode_id': video_id})
|
ProgramID
|
||||||
|
title
|
||||||
m3u8_url = 'http://m.s1.telewebion.com/smil/%s.m3u8?filepath=%s&m3u8=1&secure_token=%s' % (
|
|
||||||
video_id, episode_details['file_path'], secure_token)
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
m3u8_url, video_id, ext='mp4', m3u8_id='hls')
|
|
||||||
|
|
||||||
picture_paths = [
|
|
||||||
episode_details.get('picture_path'),
|
|
||||||
episode_details.get('large_picture_path'),
|
|
||||||
]
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'url': picture_path,
|
|
||||||
'preference': idx,
|
|
||||||
} for idx, picture_path in enumerate(picture_paths) if picture_path is not None]
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': episode_details['title'],
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'view_count': episode_details.get('view_count'),
|
|
||||||
}
|
}
|
||||||
|
image
|
||||||
|
view_count
|
||||||
|
duration
|
||||||
|
started_at
|
||||||
|
created_at
|
||||||
|
channel {
|
||||||
|
ChannelID
|
||||||
|
name
|
||||||
|
descriptor
|
||||||
|
}
|
||||||
|
tags {
|
||||||
|
name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'''), {'EpisodeId': ('[ID!]', video_id)})
|
||||||
|
|
||||||
|
info_dict = traverse_obj(episode_data, ('queryEpisode', 0, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'view_count': ('view_count', {int_or_none}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
'tags': ('tags', ..., 'name', {str}),
|
||||||
|
'release_timestamp': ('started_at', {parse_iso8601}),
|
||||||
|
'timestamp': ('created_at', {parse_iso8601}),
|
||||||
|
'series': ('program', 'title', {str}),
|
||||||
|
'series_id': ('program', 'ProgramID', {str}),
|
||||||
|
'channel': ('channel', 'name', {str}),
|
||||||
|
'channel_id': ('channel', 'ChannelID', {str}),
|
||||||
|
'channel_url': ('channel', 'descriptor', {_fmt_url('https://telewebion.com/live/%s')}),
|
||||||
|
'thumbnail': ('image', {_fmt_url('https://static.telewebion.com/episodeImages/%s/default')}),
|
||||||
|
'formats': (
|
||||||
|
'channel', 'descriptor', {str},
|
||||||
|
{_fmt_url(f'https://cdna.telewebion.com/%s/episode/{video_id}/playlist.m3u8')},
|
||||||
|
{partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}),
|
||||||
|
}))
|
||||||
|
info_dict['id'] = video_id
|
||||||
|
return info_dict
|
||||||
|
Loading…
Reference in New Issue