[azmedien] Adopt to major site redesign (closes #17745)

pull/2/head
Alexander Seiler 6 years ago committed by Sergey M․
parent 95e42d7336
commit da56fb631f
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

@ -1,19 +1,16 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .kaltura import KalturaIE from .kaltura import KalturaIE
from ..utils import (
get_element_by_class,
get_element_by_id,
strip_or_none,
urljoin,
)
class AZMedienBaseIE(InfoExtractor): class AZMedienBaseIE(InfoExtractor):
_PARTNER_ID = '1719221'
def _kaltura_video(self, partner_id, entry_id): def _kaltura_video(self, partner_id, entry_id):
return self.url_result( return self.url_result(
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(), 'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
@ -25,189 +22,74 @@ class AZMedienIE(AZMedienBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:www\.)? (?:www\.)?
(?: (?P<host>
telezueri\.ch| telezueri\.ch|
telebaern\.tv| telebaern\.tv|
telem1\.ch telem1\.ch
)/ )/
[0-9]+-show-[^/\#]+ [^/]+/
(?: (?P<id>
/[0-9]+-episode-[^/\#]+ [^/]+-(?P<article_id>\d+)
)
(?: (?:
/[0-9]+-segment-(?:[^/\#]+\#)?| \#video=
\# (?P<kaltura_id>
)| [_0-9a-z]+
\#
) )
(?P<id>[^\#]+) )?
''' '''
_TESTS = [{ _TESTS = [{
# URL with 'segment' 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
'info_dict': { 'info_dict': {
'id': '1_2444peh4', 'id': '1_anruz3wy',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', 'description': 'md5:dd9f96751ec9c35e409a698a328402f3',
'uploader_id': 'TeleZ?ri', 'uploader_id': 'TVOnline',
'upload_date': '20161218', 'upload_date': '20180930',
'timestamp': 1482084490, 'timestamp': 1538328802,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
# URL with 'segment' and fragment: 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
'only_matching': True
}, {
# URL with 'episode' and fragment:
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
'only_matching': True
}, {
# URL with 'show' and fragment:
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
'only_matching': True 'only_matching': True
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
entry_id = mobj.group('kaltura_id')
if not entry_id:
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
api_path = self._search_regex(
partner_id = self._search_regex( r'["\']apiPath["\']\s*:\s*["\']([^"^\']+)["\']',
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)', webpage, 'api path')
webpage, 'kaltura partner id') api_url = 'https://www.%s%s' % (mobj.group('host'), api_path)
entry_id = self._html_search_regex( payload = {
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s' 'query': '''query VideoContext($articleId: ID!) {
% re.escape(video_id), webpage, 'kaltura entry id', group='id') article: node(id: $articleId) {
... on Article {
return self._kaltura_video(partner_id, entry_id) mainAssetRelation {
asset {
... on VideoAsset {
class AZMedienPlaylistIE(AZMedienBaseIE): kalturaId
IE_DESC = 'AZ Medien playlists' }
_VALID_URL = r'''(?x) }
https?:// }
(?:www\.)?
(?:
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
(?P<id>[0-9]+-
(?:
show|
topic|
themen
)-[^/\#]+
(?:
/[0-9]+-episode-[^/\#]+
)?
)$
'''
_TESTS = [{
# URL with 'episode'
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
'info_dict': {
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
'title': 'News - Donnerstag, 15. Dezember 2016',
},
'playlist_count': 9,
}, {
# URL with 'themen'
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
'info_dict': {
'id': '258-themen-tele-m1-classics',
'title': 'Tele M1 Classics',
},
'playlist_mincount': 15,
}, {
# URL with 'topic', contains nested playlists
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
'only_matching': True,
}, {
# URL with 'show' only
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
'only_matching': True
}]
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(url, show_id)
entries = []
partner_id = self._search_regex(
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
webpage, 'kaltura partner id', default=None)
if partner_id:
entries = [
self._kaltura_video(partner_id, m.group('id'))
for m in re.finditer(
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
if not entries:
entries = [
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
for m in re.finditer(
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
if not entries:
entries = [
# May contain nested playlists (e.g. [1]) thus no explicit
# ie_key
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
self.url_result(urljoin(url, m.group('url')))
for m in re.finditer(
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
title = self._search_regex(
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
webpage, 'title',
default=strip_or_none(get_element_by_id(
'video-title', webpage)), group='title')
return self.playlist_result(entries, show_id, title)
class AZMedienShowPlaylistIE(AZMedienBaseIE):
IE_DESC = 'AZ Medien show playlists'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
(?:
all-episodes|
alle-episoden
)/
(?P<id>[^/?#&]+)
'''
_TEST = {
'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
'info_dict': {
'id': 'astrotalk',
'title': 'TeleZüri: AstroTalk - alle episoden',
'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
},
'playlist_mincount': 13,
} }
}
}''',
'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
}
json_data = self._download_json(
api_url, video_id, headers={
'Content-Type': 'application/json',
},
data=json.dumps(payload).encode())
entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
def _real_extract(self, url): return self._kaltura_video(self._PARTNER_ID, entry_id)
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
episodes = get_element_by_class('search-mobile-box', webpage)
entries = [self.url_result(
urljoin(url, m.group('url'))) for m in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
title = self._og_search_title(webpage, fatal=False)
description = self._og_search_description(webpage)
return self.playlist_result(entries, playlist_id, title, description)

@ -88,11 +88,7 @@ from .awaan import (
AWAANLiveIE, AWAANLiveIE,
AWAANSeasonIE, AWAANSeasonIE,
) )
from .azmedien import ( from .azmedien import AZMedienIE
AZMedienIE,
AZMedienPlaylistIE,
AZMedienShowPlaylistIE,
)
from .baidu import BaiduVideoIE from .baidu import BaiduVideoIE
from .bambuser import BambuserIE, BambuserChannelIE from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE

Loading…
Cancel
Save