Merge branch 'FliegendeWurst-3sat-zdf-merger-bugfix-feature'

pull/2/head
Tom-Oliver Heidel 4 years ago
commit 4cd6add62b

@ -1,193 +0,0 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
xpath_text,
determine_ext,
float_or_none,
ExtractorError,
)
class DreiSatIE(InfoExtractor):
IE_NAME = '3sat'
_GEO_COUNTRIES = ['DE']
_VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
_TESTS = [
{
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
'md5': 'be37228896d30a88f315b638900a026e',
'info_dict': {
'id': '45918',
'ext': 'mp4',
'title': 'Waidmannsheil',
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
'uploader': 'SCHWEIZWEIT',
'uploader_id': '100000210',
'upload_date': '20140913'
},
'params': {
'skip_download': True, # m3u8 downloads
}
},
{
'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
'only_matching': True,
},
]
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
param_groups = {}
for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
group_id = param_group.get(self._xpath_ns(
'id', 'http://www.w3.org/XML/1998/namespace'))
params = {}
for param in param_group:
params[param.get('name')] = param.get('value')
param_groups[group_id] = params
formats = []
for video in smil.findall(self._xpath_ns('.//video', namespace)):
src = video.get('src')
if not src:
continue
bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
group_id = video.get('paramGroup')
param_group = param_groups[group_id]
for proto in param_group['protocols'].split(','):
formats.append({
'url': '%s://%s' % (proto, param_group['host']),
'app': param_group['app'],
'play_path': src,
'ext': 'flv',
'format_id': '%s-%d' % (proto, bitrate),
'tbr': bitrate,
})
self._sort_formats(formats)
return formats
def extract_from_xml_url(self, video_id, xml_url):
doc = self._download_xml(
xml_url, video_id,
note='Downloading video info',
errnote='Failed to download video info')
status_code = xpath_text(doc, './status/statuscode')
if status_code and status_code != 'ok':
if status_code == 'notVisibleAnymore':
message = 'Video %s is not available' % video_id
else:
message = '%s returned error: %s' % (self.IE_NAME, status_code)
raise ExtractorError(message, expected=True)
title = xpath_text(doc, './/information/title', 'title', True)
urls = []
formats = []
for fnode in doc.findall('.//formitaeten/formitaet'):
video_url = xpath_text(fnode, 'url')
if not video_url or video_url in urls:
continue
urls.append(video_url)
is_available = 'http://www.metafilegenerator' not in video_url
geoloced = 'static_geoloced_online' in video_url
if not is_available or geoloced:
continue
format_id = fnode.attrib['basetype']
format_m = re.match(r'''(?x)
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
''', format_id)
ext = determine_ext(video_url, None) or format_m.group('container')
if ext == 'meta':
continue
elif ext == 'smil':
formats.extend(self._extract_smil_formats(
video_url, video_id, fatal=False))
elif ext == 'm3u8':
# the certificates are misconfigured (see
# https://github.com/ytdl-org/youtube-dl/issues/8665)
if video_url.startswith('https://'):
continue
formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', 'm3u8_native',
m3u8_id=format_id, fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
video_url, video_id, f4m_id=format_id, fatal=False))
else:
quality = xpath_text(fnode, './quality')
if quality:
format_id += '-' + quality
abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
tbr = int_or_none(self._search_regex(
r'_(\d+)k', video_url, 'bitrate', None))
if tbr and vbr and not abr:
abr = tbr - vbr
formats.append({
'format_id': format_id,
'url': video_url,
'ext': ext,
'acodec': format_m.group('acodec'),
'vcodec': format_m.group('vcodec'),
'abr': abr,
'vbr': vbr,
'tbr': tbr,
'width': int_or_none(xpath_text(fnode, './width')),
'height': int_or_none(xpath_text(fnode, './height')),
'filesize': int_or_none(xpath_text(fnode, './filesize')),
'protocol': format_m.group('proto').lower(),
})
geolocation = xpath_text(doc, './/details/geolocation')
if not formats and geolocation and geolocation != 'none':
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
self._sort_formats(formats)
thumbnails = []
for node in doc.findall('.//teaserimages/teaserimage'):
thumbnail_url = node.text
if not thumbnail_url:
continue
thumbnail = {
'url': thumbnail_url,
}
thumbnail_key = node.get('key')
if thumbnail_key:
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
if m:
thumbnail['width'] = int(m.group(1))
thumbnail['height'] = int(m.group(2))
thumbnails.append(thumbnail)
upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
return {
'id': video_id,
'title': title,
'description': xpath_text(doc, './/information/detail'),
'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
'thumbnails': thumbnails,
'uploader': xpath_text(doc, './/details/originChannelTitle'),
'uploader_id': xpath_text(doc, './/details/originChannelId'),
'upload_date': upload_date,
'formats': formats,
}
def _real_extract(self, url):
video_id = self._match_id(url)
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
return self.extract_from_xml_url(video_id, details_url)

@ -273,7 +273,6 @@ from .douyutv import (
DouyuTVIE, DouyuTVIE,
) )
from .dplay import DPlayIE from .dplay import DPlayIE
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE from .drbonanza import DRBonanzaIE
from .drtuber import DrTuberIE from .drtuber import DrTuberIE
from .drtv import ( from .drtv import (

@ -1,45 +1,52 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .dreisat import DreiSatIE from .common import InfoExtractor
from ..utils import ExtractorError
class PhoenixIE(DreiSatIE): class PhoenixIE(InfoExtractor):
IE_NAME = 'phoenix.de' IE_NAME = 'phoenix.de'
_VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/ _VALID_URL = r'''https?://(?:www\.)?phoenix.de/\D+(?P<id>\d+)\.html'''
(?:
phoenix/die_sendungen/(?:[^/]+/)?
)?
(?P<id>[0-9]+)'''
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.phoenix.de/content/884301', 'url': 'https://www.phoenix.de/sendungen/dokumentationen/unsere-welt-in-zukunft---stadt-a-1283620.html',
'md5': 'ed249f045256150c92e72dbb70eadec6', 'md5': '5e765e838aa3531c745a4f5b249ee3e3',
'info_dict': { 'info_dict': {
'id': '884301', 'id': '0OB4HFc43Ns',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Michael Krons mit Hans-Werner Sinn', 'title': 'Unsere Welt in Zukunft - Stadt',
'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr', 'description': 'md5:9bfb6fd498814538f953b2dcad7ce044',
'upload_date': '20141025', 'upload_date': '20190912',
'uploader': 'Im Dialog', 'uploader': 'phoenix',
'uploader_id': 'phoenix',
} }
}, },
{ {
'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815', 'url': 'https://www.phoenix.de/drohnenangriffe-in-saudi-arabien-a-1286995.html?ref=aktuelles',
'only_matching': True,
},
{
'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
'only_matching': True, 'only_matching': True,
}, },
# an older page: https://www.phoenix.de/sendungen/gespraeche/phoenix-persoenlich/im-dialog-a-177727.html
# seems to not have an embedded video, even though it's uploaded on youtube: https://www.youtube.com/watch?v=4GxnoUHvOkM
] ]
def _real_extract(self, url): def extract_from_json_api(self, video_id, api_url):
video_id = self._match_id(url) doc = self._download_json(
webpage = self._download_webpage(url, video_id) api_url, video_id,
note="Downloading webpage metadata",
errnote="Failed to load webpage metadata")
internal_id = self._search_regex( for a in doc["absaetze"]:
r'<div class="phx_vod" id="phx_vod_([0-9]+)"', if a["typ"] == "video-youtube":
webpage, 'internal video ID') return {
'_type': 'url_transparent',
'id': a["id"],
'title': doc["titel"],
'url': "https://www.youtube.com/watch?v=%s" % a["id"],
'ie_key': 'Youtube',
}
raise ExtractorError("No downloadable video found", expected=True)
api_url = 'http://www.phoenix.de/php/mediaplayer/data/beitrags_details.php?ak=web&id=%s' % internal_id def _real_extract(self, url):
return self.extract_from_xml_url(video_id, api_url) page_id = self._match_id(url)
api_url = 'https://www.phoenix.de/response/id/%s' % page_id
return self.extract_from_json_api(page_id, api_url)

@ -39,11 +39,23 @@ class ZDFBaseIE(InfoExtractor):
class ZDFIE(ZDFBaseIE): class ZDFIE(ZDFBaseIE):
_VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html' IE_NAME = "ZDF-3sat"
_VALID_URL = r'https?://www\.(zdf|3sat)\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
_QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh') _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
_GEO_COUNTRIES = ['DE'] _GEO_COUNTRIES = ['DE']
_TESTS = [{ _TESTS = [{
'url': 'https://www.3sat.de/wissen/wissenschaftsdoku/luxusgut-lebensraum-100.html',
'info_dict': {
'id': 'luxusgut-lebensraum-100',
'ext': 'mp4',
'title': 'Luxusgut Lebensraum',
'description': 'md5:5c09b2f45ac3bc5233d1b50fc543d061',
'duration': 2601,
'timestamp': 1566497700,
'upload_date': '20190822',
}
}, {
'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html', 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
'info_dict': { 'info_dict': {
'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100', 'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',

Loading…
Cancel
Save