[philharmoniedeparis] Fix extraction and add support for pad.philharmoniedeparis.fr (closes #17705)

pull/2/head
Sergey M․ 6 years ago
parent 3c7da54c92
commit 66d106f270
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

@ -2,31 +2,38 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
float_or_none, try_get,
int_or_none, urljoin,
parse_iso8601,
xpath_text,
) )
class PhilharmonieDeParisIE(InfoExtractor): class PhilharmonieDeParisIE(InfoExtractor):
IE_DESC = 'Philharmonie de Paris' IE_DESC = 'Philharmonie de Paris'
_VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)' _VALID_URL = r'''(?x)
https?://
(?:
live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)|
pad\.philharmoniedeparis\.fr/doc/CIMU/
)
(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower',
'md5': 'a0a4b195f544645073631cbec166a2c2',
'info_dict': {
'id': '1086697',
'ext': 'mp4',
'title': 'Jazz à la Villette : Knower',
},
}, {
'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
'info_dict': { 'info_dict': {
'id': '1032066', 'id': '1032066',
'ext': 'flv', 'title': 'md5:0a031b81807b3593cffa3c9a87a167a0',
'title': 'md5:d1f5585d87d041d07ce9434804bc8425',
'timestamp': 1428179400,
'upload_date': '20150404',
'duration': 6592.278,
}, },
'params': { 'playlist_mincount': 2,
# rtmp download
'skip_download': True,
}
}, { }, {
'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html', 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
'only_matching': True, 'only_matching': True,
@ -34,45 +41,60 @@ class PhilharmonieDeParisIE(InfoExtractor):
'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
'only_matching': True, 'only_matching': True,
}] }]
_LIVE_URL = 'https://live.philharmoniedeparis.fr'
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
concert = self._download_xml( config = self._download_json(
'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id, '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={
video_id).find('./concert') 'id': video_id,
'lang': 'fr-FR',
})
def extract_entry(source):
if not isinstance(source, dict):
return
title = source.get('title')
if not title:
return
files = source.get('files')
if not isinstance(files, dict):
return
format_urls = set()
formats = [] formats = []
info_dict = { for format_id in ('mobile', 'desktop'):
'id': video_id, format_url = try_get(
'title': xpath_text(concert, './titre', 'title', fatal=True), files, lambda x: x[format_id]['file'], compat_str)
if not format_url or format_url in format_urls:
continue
format_urls.add(format_url)
m3u8_url = urljoin(self._LIVE_URL, format_url)
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
if not formats:
return
self._sort_formats(formats)
return {
'title': title,
'formats': formats, 'formats': formats,
} }
fichiers = concert.find('./fichiers') thumbnail = urljoin(self._LIVE_URL, config.get('image'))
stream = fichiers.attrib['serveurstream']
for fichier in fichiers.findall('./fichier'): info = extract_entry(config)
info_dict['duration'] = float_or_none(fichier.get('timecodefin')) if info:
for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]): info.update({
format_url = fichier.get('url%s' % suffix) 'id': video_id,
if not format_url: 'thumbnail': thumbnail,
continue
formats.append({
'url': stream,
'play_path': format_url,
'ext': 'flv',
'format_id': format_id,
'width': int_or_none(concert.get('largeur%s' % suffix)),
'height': int_or_none(concert.get('hauteur%s' % suffix)),
'quality': quality,
}) })
self._sort_formats(formats) return info
date, hour = concert.get('date'), concert.get('heure') entries = []
if date and hour: for num, chapter in enumerate(config['chapters'], start=1):
info_dict['timestamp'] = parse_iso8601( entry = extract_entry(chapter)
'%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour)) entry['id'] = '%s-%d' % (video_id, num)
elif date: entries.append(entry)
info_dict['upload_date'] = date
return info_dict return self.playlist_result(entries, video_id, config.get('title'))

Loading…
Cancel
Save