|
|
@ -5,7 +5,6 @@ from ..utils import (
|
|
|
|
NO_DEFAULT,
|
|
|
|
NO_DEFAULT,
|
|
|
|
ExtractorError,
|
|
|
|
ExtractorError,
|
|
|
|
determine_ext,
|
|
|
|
determine_ext,
|
|
|
|
extract_attributes,
|
|
|
|
|
|
|
|
float_or_none,
|
|
|
|
float_or_none,
|
|
|
|
int_or_none,
|
|
|
|
int_or_none,
|
|
|
|
join_nonempty,
|
|
|
|
join_nonempty,
|
|
|
@ -25,6 +24,11 @@ class ZDFBaseIE(InfoExtractor):
|
|
|
|
_GEO_COUNTRIES = ['DE']
|
|
|
|
_GEO_COUNTRIES = ['DE']
|
|
|
|
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd')
|
|
|
|
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd', 'fhd', 'uhd')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _download_v2_doc(self, document_id):
|
|
|
|
|
|
|
|
return self._download_json(
|
|
|
|
|
|
|
|
f'https://zdf-prod-futura.zdf.de/mediathekV2/document/{document_id}',
|
|
|
|
|
|
|
|
document_id)
|
|
|
|
|
|
|
|
|
|
|
|
def _call_api(self, url, video_id, item, api_token=None, referrer=None):
|
|
|
|
def _call_api(self, url, video_id, item, api_token=None, referrer=None):
|
|
|
|
headers = {}
|
|
|
|
headers = {}
|
|
|
|
if api_token:
|
|
|
|
if api_token:
|
|
|
@ -320,9 +324,7 @@ class ZDFIE(ZDFBaseIE):
|
|
|
|
return self._extract_entry(player['content'], player, content, video_id)
|
|
|
|
return self._extract_entry(player['content'], player, content, video_id)
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_mobile(self, video_id):
|
|
|
|
def _extract_mobile(self, video_id):
|
|
|
|
video = self._download_json(
|
|
|
|
video = self._download_v2_doc(video_id)
|
|
|
|
f'https://zdf-cdn.live.cellular.de/mediathekV2/document/{video_id}',
|
|
|
|
|
|
|
|
video_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
formats = []
|
|
|
|
formats = []
|
|
|
|
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
|
|
|
|
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
|
|
|
@ -374,7 +376,7 @@ class ZDFIE(ZDFBaseIE):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ZDFChannelIE(ZDFBaseIE):
|
|
|
|
class ZDFChannelIE(ZDFBaseIE):
|
|
|
|
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
|
|
|
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/?#]+/)*(?P<id>[^/?#&]+)'
|
|
|
|
_TESTS = [{
|
|
|
|
_TESTS = [{
|
|
|
|
'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
|
|
|
|
'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
|
|
|
|
'info_dict': {
|
|
|
|
'info_dict': {
|
|
|
@ -387,18 +389,19 @@ class ZDFChannelIE(ZDFBaseIE):
|
|
|
|
'info_dict': {
|
|
|
|
'info_dict': {
|
|
|
|
'id': 'planet-e',
|
|
|
|
'id': 'planet-e',
|
|
|
|
'title': 'planet e.',
|
|
|
|
'title': 'planet e.',
|
|
|
|
|
|
|
|
'description': 'md5:87e3b9c66a63cf1407ee443d2c4eb88e',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
'playlist_mincount': 50,
|
|
|
|
'playlist_mincount': 50,
|
|
|
|
}, {
|
|
|
|
}, {
|
|
|
|
'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
|
|
|
|
'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
|
|
|
|
'info_dict': {
|
|
|
|
'info_dict': {
|
|
|
|
'id': 'aktenzeichen-xy-ungeloest',
|
|
|
|
'id': 'aktenzeichen-xy-ungeloest',
|
|
|
|
'title': 'Aktenzeichen XY... ungelöst',
|
|
|
|
'title': 'Aktenzeichen XY... Ungelöst',
|
|
|
|
'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
|
|
|
|
'description': 'md5:623ede5819c400c6d04943fa8100e6e7',
|
|
|
|
},
|
|
|
|
},
|
|
|
|
'playlist_mincount': 2,
|
|
|
|
'playlist_mincount': 2,
|
|
|
|
}, {
|
|
|
|
}, {
|
|
|
|
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
|
|
|
'url': 'https://www.zdf.de/serien/taunuskrimi/',
|
|
|
|
'only_matching': True,
|
|
|
|
'only_matching': True,
|
|
|
|
}]
|
|
|
|
}]
|
|
|
|
|
|
|
|
|
|
|
@ -406,36 +409,41 @@ class ZDFChannelIE(ZDFBaseIE):
|
|
|
|
def suitable(cls, url):
|
|
|
|
def suitable(cls, url):
|
|
|
|
return False if ZDFIE.suitable(url) else super().suitable(url)
|
|
|
|
return False if ZDFIE.suitable(url) else super().suitable(url)
|
|
|
|
|
|
|
|
|
|
|
|
def _og_search_title(self, webpage, fatal=False):
|
|
|
|
def _extract_entry(self, entry):
|
|
|
|
title = super()._og_search_title(webpage, fatal=fatal)
|
|
|
|
return self.url_result(
|
|
|
|
return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
|
|
|
|
entry['sharingUrl'], ZDFIE, **traverse_obj(entry, {
|
|
|
|
|
|
|
|
'id': ('basename', {str}),
|
|
|
|
|
|
|
|
'title': ('titel', {str}),
|
|
|
|
|
|
|
|
'description': ('beschreibung', {str}),
|
|
|
|
|
|
|
|
'duration': ('length', {float_or_none}),
|
|
|
|
|
|
|
|
# TODO: seasonNumber and episodeNumber can be extracted but need to also be in ZDFIE
|
|
|
|
|
|
|
|
}))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _entries(self, data, document_id):
|
|
|
|
|
|
|
|
for entry in traverse_obj(data, (
|
|
|
|
|
|
|
|
'cluster', lambda _, v: v['type'] == 'teaser',
|
|
|
|
|
|
|
|
# If 'brandId' differs, it is a 'You might also like' video. Filter these out
|
|
|
|
|
|
|
|
'teaser', lambda _, v: v['type'] == 'video' and v['brandId'] == document_id and v['sharingUrl'],
|
|
|
|
|
|
|
|
)):
|
|
|
|
|
|
|
|
yield self._extract_entry(entry)
|
|
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
def _real_extract(self, url):
|
|
|
|
channel_id = self._match_id(url)
|
|
|
|
channel_id = self._match_id(url)
|
|
|
|
|
|
|
|
|
|
|
|
webpage = self._download_webpage(url, channel_id)
|
|
|
|
webpage = self._download_webpage(url, channel_id)
|
|
|
|
|
|
|
|
document_id = self._search_regex(
|
|
|
|
matches = re.finditer(
|
|
|
|
r'docId\s*:\s*(["\'])(?P<doc_id>(?:(?!\1).)+)\1', webpage, 'document id', group='doc_id')
|
|
|
|
rf'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>{ZDFIE._VALID_URL})\1''',
|
|
|
|
data = self._download_v2_doc(document_id)
|
|
|
|
webpage)
|
|
|
|
|
|
|
|
|
|
|
|
main_video = traverse_obj(data, (
|
|
|
|
if self._downloader.params.get('noplaylist', False):
|
|
|
|
'cluster', lambda _, v: v['type'] == 'teaserContent',
|
|
|
|
entry = next(
|
|
|
|
'teaser', lambda _, v: v['type'] == 'video' and v['basename'] and v['sharingUrl'], any)) or {}
|
|
|
|
(self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
|
|
|
|
|
|
|
|
None)
|
|
|
|
if not self._yes_playlist(channel_id, main_video.get('basename')):
|
|
|
|
self.to_screen('Downloading just the main video because of --no-playlist')
|
|
|
|
return self._extract_entry(main_video)
|
|
|
|
if entry:
|
|
|
|
|
|
|
|
return entry
|
|
|
|
return self.playlist_result(
|
|
|
|
else:
|
|
|
|
self._entries(data, document_id), channel_id,
|
|
|
|
self.to_screen(f'Downloading playlist {channel_id} - add --no-playlist to download just the main video')
|
|
|
|
re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', self._og_search_title(webpage) or '')[0] or None,
|
|
|
|
|
|
|
|
join_nonempty(
|
|
|
|
def check_video(m):
|
|
|
|
'headline', 'text', delim='\n\n',
|
|
|
|
v_ref = self._search_regex(
|
|
|
|
from_dict=traverse_obj(data, ('shortText', {dict}), default={})) or None)
|
|
|
|
r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["']){}\2[^>]*>)'''.format(m.group('p_id')),
|
|
|
|
|
|
|
|
webpage, 'check id', default='')
|
|
|
|
|
|
|
|
v_ref = extract_attributes(v_ref)
|
|
|
|
|
|
|
|
return v_ref.get('data-target-video-type') != 'novideo'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return self.playlist_from_matches(
|
|
|
|
|
|
|
|
(m.group('url') for m in matches if check_video(m)),
|
|
|
|
|
|
|
|
channel_id, self._og_search_title(webpage, fatal=False))
|
|
|
|
|
|
|
|