|
|
@ -8,10 +8,10 @@ from ..utils import (
|
|
|
|
ExtractorError,
|
|
|
|
ExtractorError,
|
|
|
|
find_xpath_attr,
|
|
|
|
find_xpath_attr,
|
|
|
|
unified_strdate,
|
|
|
|
unified_strdate,
|
|
|
|
determine_ext,
|
|
|
|
|
|
|
|
get_element_by_id,
|
|
|
|
get_element_by_id,
|
|
|
|
get_element_by_attribute,
|
|
|
|
get_element_by_attribute,
|
|
|
|
int_or_none,
|
|
|
|
int_or_none,
|
|
|
|
|
|
|
|
qualities,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# There are different sources of video in arte.tv, the extraction process
|
|
|
|
# There are different sources of video in arte.tv, the extraction process
|
|
|
@ -102,79 +102,54 @@ class ArteTVPlus7IE(InfoExtractor):
|
|
|
|
'upload_date': unified_strdate(upload_date_str),
|
|
|
|
'upload_date': unified_strdate(upload_date_str),
|
|
|
|
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
|
|
|
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ'])
|
|
|
|
|
|
|
|
|
|
|
|
all_formats = []
|
|
|
|
formats = []
|
|
|
|
for format_id, format_dict in player_info['VSR'].items():
|
|
|
|
for format_id, format_dict in player_info['VSR'].items():
|
|
|
|
fmt = dict(format_dict)
|
|
|
|
f = dict(format_dict)
|
|
|
|
fmt['format_id'] = format_id
|
|
|
|
|
|
|
|
all_formats.append(fmt)
|
|
|
|
|
|
|
|
# Some formats use the m3u8 protocol
|
|
|
|
|
|
|
|
all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
|
|
|
|
|
|
|
|
def _match_lang(f):
|
|
|
|
|
|
|
|
if f.get('versionCode') is None:
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
# Return true if that format is in the language of the url
|
|
|
|
|
|
|
|
if lang == 'fr':
|
|
|
|
|
|
|
|
l = 'F'
|
|
|
|
|
|
|
|
elif lang == 'de':
|
|
|
|
|
|
|
|
l = 'A'
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
l = lang
|
|
|
|
|
|
|
|
regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
|
|
|
|
|
|
|
|
return any(re.match(r, f['versionCode']) for r in regexes)
|
|
|
|
|
|
|
|
# Some formats may not be in the same language as the url
|
|
|
|
|
|
|
|
# TODO: Might want not to drop videos that does not match requested language
|
|
|
|
|
|
|
|
# but to process those formats with lower precedence
|
|
|
|
|
|
|
|
formats = filter(_match_lang, all_formats)
|
|
|
|
|
|
|
|
formats = list(formats) # in python3 filter returns an iterator
|
|
|
|
|
|
|
|
if not formats:
|
|
|
|
|
|
|
|
# Some videos are only available in the 'Originalversion'
|
|
|
|
|
|
|
|
# they aren't tagged as being in French or German
|
|
|
|
|
|
|
|
# Sometimes there are neither videos of requested lang code
|
|
|
|
|
|
|
|
# nor original version videos available
|
|
|
|
|
|
|
|
# For such cases we just take all_formats as is
|
|
|
|
|
|
|
|
formats = all_formats
|
|
|
|
|
|
|
|
if not formats:
|
|
|
|
|
|
|
|
raise ExtractorError('The formats list is empty')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
|
|
|
|
|
|
|
|
def sort_key(f):
|
|
|
|
|
|
|
|
return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
def sort_key(f):
|
|
|
|
|
|
|
|
versionCode = f.get('versionCode')
|
|
|
|
versionCode = f.get('versionCode')
|
|
|
|
if versionCode is None:
|
|
|
|
|
|
|
|
versionCode = ''
|
|
|
|
langcode = {
|
|
|
|
return (
|
|
|
|
'fr': 'F',
|
|
|
|
# Sort first by quality
|
|
|
|
'de': 'A',
|
|
|
|
int(f.get('height', -1)),
|
|
|
|
}.get(lang, lang)
|
|
|
|
int(f.get('bitrate', -1)),
|
|
|
|
lang_rexs = [r'VO?%s' % langcode, r'VO?.-ST%s' % langcode]
|
|
|
|
|
|
|
|
lang_pref = (
|
|
|
|
|
|
|
|
None if versionCode is None else (
|
|
|
|
|
|
|
|
10 if any(re.match(r, versionCode) for r in lang_rexs)
|
|
|
|
|
|
|
|
else -10))
|
|
|
|
|
|
|
|
source_pref = 0
|
|
|
|
|
|
|
|
if versionCode is not None:
|
|
|
|
# The original version with subtitles has lower relevance
|
|
|
|
# The original version with subtitles has lower relevance
|
|
|
|
re.match(r'VO-ST(F|A)', versionCode) is None,
|
|
|
|
if re.match(r'VO-ST(F|A)', versionCode):
|
|
|
|
|
|
|
|
source_pref -= 10
|
|
|
|
# The version with sourds/mal subtitles has also lower relevance
|
|
|
|
# The version with sourds/mal subtitles has also lower relevance
|
|
|
|
re.match(r'VO?(F|A)-STM\1', versionCode) is None,
|
|
|
|
elif re.match(r'VO?(F|A)-STM\1', versionCode):
|
|
|
|
# Prefer http downloads over m3u8
|
|
|
|
source_pref -= 9
|
|
|
|
0 if f['url'].endswith('m3u8') else 1,
|
|
|
|
format = {
|
|
|
|
)
|
|
|
|
'format_id': format_id,
|
|
|
|
formats = sorted(formats, key=sort_key)
|
|
|
|
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
|
|
|
def _format(format_info):
|
|
|
|
'language_preference': lang_pref,
|
|
|
|
info = {
|
|
|
|
'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
|
|
|
|
'format_id': format_info['format_id'],
|
|
|
|
'width': int_or_none(f.get('width')),
|
|
|
|
'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
|
|
|
|
'height': int_or_none(f.get('height')),
|
|
|
|
'width': int_or_none(format_info.get('width')),
|
|
|
|
'tbr': int_or_none(f.get('bitrate')),
|
|
|
|
'height': int_or_none(format_info.get('height')),
|
|
|
|
'quality': qfunc(f['quality']),
|
|
|
|
'tbr': int_or_none(format_info.get('bitrate')),
|
|
|
|
'source_preference': source_pref,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if format_info['mediaType'] == 'rtmp':
|
|
|
|
|
|
|
|
info['url'] = format_info['streamer']
|
|
|
|
if f.get('mediaType') == 'rtmp':
|
|
|
|
info['play_path'] = 'mp4:' + format_info['url']
|
|
|
|
format['url'] = f['streamer']
|
|
|
|
info['ext'] = 'flv'
|
|
|
|
format['play_path'] = 'mp4:' + f['url']
|
|
|
|
|
|
|
|
format['ext'] = 'flv'
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
info['url'] = format_info['url']
|
|
|
|
format['url'] = f['url']
|
|
|
|
info['ext'] = determine_ext(info['url'])
|
|
|
|
|
|
|
|
return info
|
|
|
|
formats.append(format)
|
|
|
|
info_dict['formats'] = [_format(f) for f in formats]
|
|
|
|
|
|
|
|
|
|
|
|
self._sort_formats(formats)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
info_dict['formats'] = formats
|
|
|
|
return info_dict
|
|
|
|
return info_dict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|