[extractor] Common function `_match_valid_url`

pull/763/head
pukkandan 3 years ago
parent f79ec47d71
commit 5ad28e7ffd
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698

@ -31,8 +31,10 @@ with open('devscripts/lazy_load_template.py', 'rt') as f:
module_template = f.read() module_template = f.read()
module_contents = [ module_contents = [
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n', module_template,
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n'] getsource(InfoExtractor._match_valid_url),
getsource(InfoExtractor.suitable),
'\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
ie_template = ''' ie_template = '''
class {name}({bases}): class {name}({bases}):

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .amp import AMPIE from .amp import AMPIE
from .common import InfoExtractor from .common import InfoExtractor
@ -59,7 +58,7 @@ class AbcNewsVideoIE(AMPIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
display_id = mobj.group('display_id') display_id = mobj.group('display_id')
video_id = mobj.group('id') video_id = mobj.group('id')
info_dict = self._extract_feed_info( info_dict = self._extract_feed_info(

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
@ -55,7 +54,7 @@ class ABCOTVSIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
site, display_id, video_id = re.match(self._VALID_URL, url).groups() site, display_id, video_id = self._match_valid_url(url).groups()
display_id = display_id or video_id display_id = display_id or video_id
station = self._SITE_MAP[site] station = self._SITE_MAP[site]

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -80,7 +79,7 @@ class ACastIE(ACastBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
channel, display_id = re.match(self._VALID_URL, url).groups() channel, display_id = self._match_valid_url(url).groups()
episode = self._call_api( episode = self._call_api(
'%s/episodes/%s' % (channel, display_id), '%s/episodes/%s' % (channel, display_id),
display_id, {'showInfo': 'true'}) display_id, {'showInfo': 'true'})

@ -132,7 +132,7 @@ class AdobeTVIE(AdobeTVBaseIE):
} }
def _real_extract(self, url): def _real_extract(self, url):
language, show_urlname, urlname = re.match(self._VALID_URL, url).groups() language, show_urlname, urlname = self._match_valid_url(url).groups()
if not language: if not language:
language = 'en' language = 'en'
@ -178,7 +178,7 @@ class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
_process_data = AdobeTVBaseIE._parse_video_data _process_data = AdobeTVBaseIE._parse_video_data
def _real_extract(self, url): def _real_extract(self, url):
language, show_urlname = re.match(self._VALID_URL, url).groups() language, show_urlname = self._match_valid_url(url).groups()
if not language: if not language:
language = 'en' language = 'en'
query = { query = {
@ -215,7 +215,7 @@ class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id'))) show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id')))
def _real_extract(self, url): def _real_extract(self, url):
language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups() language, channel_urlname, category_urlname = self._match_valid_url(url).groups()
if not language: if not language:
language = 'en' language = 'en'
query = { query = {

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .turner import TurnerBaseIE from .turner import TurnerBaseIE
from ..utils import ( from ..utils import (
@ -89,7 +88,7 @@ class AdultSwimIE(TurnerBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
show_path, episode_path = re.match(self._VALID_URL, url).groups() show_path, episode_path = self._match_valid_url(url).groups()
display_id = episode_path or show_path display_id = episode_path or show_path
query = '''query { query = '''query {
getShowBySlug(slug:"%s") { getShowBySlug(slug:"%s") {

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from ..utils import ( from ..utils import (
@ -170,7 +169,7 @@ class AENetworksIE(AENetworksBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
domain, canonical = re.match(self._VALID_URL, url).groups() domain, canonical = self._match_valid_url(url).groups()
return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url) return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
@ -187,7 +186,7 @@ class AENetworksListBaseIE(AENetworksBaseIE):
}))['data'][resource] }))['data'][resource]
def _real_extract(self, url): def _real_extract(self, url):
domain, slug = re.match(self._VALID_URL, url).groups() domain, slug = self._match_valid_url(url).groups()
_, brand = self._DOMAIN_MAP[domain] _, brand = self._DOMAIN_MAP[domain]
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS) playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
base_url = 'http://watch.%s' % domain base_url = 'http://watch.%s' % domain
@ -309,7 +308,7 @@ class HistoryPlayerIE(AENetworksBaseIE):
_TESTS = [] _TESTS = []
def _real_extract(self, url): def _real_extract(self, url):
domain, video_id = re.match(self._VALID_URL, url).groups() domain, video_id = self._match_valid_url(url).groups()
return self._extract_aetn_info(domain, 'id', video_id, url) return self._extract_aetn_info(domain, 'id', video_id, url)

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
@ -32,7 +31,7 @@ class AlJazeeraIE(InfoExtractor):
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _real_extract(self, url): def _real_extract(self, url):
post_type, name = re.match(self._VALID_URL, url).groups() post_type, name = self._match_valid_url(url).groups()
post_type = { post_type = {
'features': 'post', 'features': 'post',
'program': 'episode', 'program': 'episode',

@ -42,8 +42,7 @@ class AluraIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) course, video_id = self._match_valid_url(url)
course = self._search_regex(self._VALID_URL, url, 'post url', group='course_name')
video_url = self._VIDEO_URL % (course, video_id) video_url = self._VIDEO_URL % (course, video_id)
video_dict = self._download_json(video_url, video_id, 'Searching for videos') video_dict = self._download_json(video_url, video_id, 'Searching for videos')

@ -63,7 +63,7 @@ class AMCNetworksIE(ThePlatformIE):
} }
def _real_extract(self, url): def _real_extract(self, url):
site, display_id = re.match(self._VALID_URL, url).groups() site, display_id = self._match_valid_url(url).groups()
requestor_id = self._REQUESTOR_ID_MAP[site] requestor_id = self._REQUESTOR_ID_MAP[site]
page_data = self._download_json( page_data = self._download_json(
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s'

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -69,7 +68,7 @@ class AmericasTestKitchenIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
resource_type, video_id = re.match(self._VALID_URL, url).groups() resource_type, video_id = self._match_valid_url(url).groups()
is_episode = resource_type == 'episode' is_episode = resource_type == 'episode'
if is_episode: if is_episode:
resource_type = 'episodes' resource_type = 'episodes'
@ -114,7 +113,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
show_name, season_number = re.match(self._VALID_URL, url).groups() show_name, season_number = self._match_valid_url(url).groups()
season_number = int(season_number) season_number = int(season_number)
slug = 'atk' if show_name == 'americastestkitchen' else 'cco' slug = 'atk' if show_name == 'americastestkitchen' else 'cco'

@ -390,7 +390,7 @@ class AnvatoIE(InfoExtractor):
'countries': smuggled_data.get('geo_countries'), 'countries': smuggled_data.get('geo_countries'),
}) })
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
access_key, video_id = mobj.group('access_key_or_mcp', 'id') access_key, video_id = mobj.group('access_key_or_mcp', 'id')
if access_key not in self._ANVACK_TABLE: if access_key not in self._ANVACK_TABLE:
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get( access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(

@ -42,7 +42,7 @@ class APAIE(InfoExtractor):
webpage)] webpage)]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id, base_url = mobj.group('id', 'base_url') video_id, base_url = mobj.group('id', 'base_url')
webpage = self._download_webpage( webpage = self._download_webpage(

@ -94,7 +94,7 @@ class AppleTrailersIE(InfoExtractor):
_JSON_RE = r'iTunes.playURL\((.*?)\);' _JSON_RE = r'iTunes.playURL\((.*?)\);'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
movie = mobj.group('movie') movie = mobj.group('movie')
uploader_id = mobj.group('company') uploader_id = mobj.group('company')

@ -86,7 +86,7 @@ class ArcPublishingIE(InfoExtractor):
return entries return entries
def _real_extract(self, url): def _real_extract(self, url):
org, uuid = re.match(self._VALID_URL, url).groups() org, uuid = self._match_valid_url(url).groups()
for orgs, tmpl in self._POWA_DEFAULTS: for orgs, tmpl in self._POWA_DEFAULTS:
if org in orgs: if org in orgs:
base_api_tmpl = tmpl base_api_tmpl = tmpl

@ -199,7 +199,7 @@ class ARDMediathekIE(ARDMediathekBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
# determine video id from url # determine video id from url
m = re.match(self._VALID_URL, url) m = self._match_valid_url(url)
document_id = None document_id = None
@ -325,7 +325,7 @@ class ARDIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
display_id = mobj.group('id') display_id = mobj.group('id')
player_url = mobj.group('mainurl') + '~playerXml.xml' player_url = mobj.group('mainurl') + '~playerXml.xml'
@ -525,7 +525,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
return self.playlist_result(entries, playlist_title=display_id) return self.playlist_result(entries, playlist_title=display_id)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('video_id') video_id = mobj.group('video_id')
display_id = mobj.group('display_id') display_id = mobj.group('display_id')
if display_id: if display_id:

@ -63,7 +63,7 @@ class ArkenaIE(InfoExtractor):
return mobj.group('url') return mobj.group('url')
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
account_id = mobj.group('account_id') account_id = mobj.group('account_id')

@ -49,7 +49,7 @@ class ArteTVIE(ArteTVBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
lang = mobj.group('lang') or mobj.group('lang_2') lang = mobj.group('lang') or mobj.group('lang_2')
@ -227,7 +227,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
lang, playlist_id = re.match(self._VALID_URL, url).groups() lang, playlist_id = self._match_valid_url(url).groups()
collection = self._download_json( collection = self._download_json(
'%s/collectionData/%s/%s?source=videos' '%s/collectionData/%s/%s?source=videos'
% (self._API_BASE, lang, playlist_id), playlist_id) % (self._API_BASE, lang, playlist_id), playlist_id)

@ -111,7 +111,7 @@ class AsianCrushIE(AsianCrushBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
host, video_id = re.match(self._VALID_URL, url).groups() host, video_id = self._match_valid_url(url).groups()
if host == 'cocoro.tv': if host == 'cocoro.tv':
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
@ -161,7 +161,7 @@ class AsianCrushPlaylistIE(AsianCrushBaseIE):
yield self._parse_video_data(video) yield self._parse_video_data(video)
def _real_extract(self, url): def _real_extract(self, url):
host, playlist_id = re.match(self._VALID_URL, url).groups() host, playlist_id = self._match_valid_url(url).groups()
if host == 'cocoro.tv': if host == 'cocoro.tv':
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..compat import compat_HTTPError
@ -75,7 +74,7 @@ class AtresPlayerIE(InfoExtractor):
self._request_webpage(target_url, None, 'Following Target URL') self._request_webpage(target_url, None, 'Following Target URL')
def _real_extract(self, url): def _real_extract(self, url):
display_id, video_id = re.match(self._VALID_URL, url).groups() display_id, video_id = self._match_valid_url(url).groups()
try: try:
episode = self._download_json( episode = self._download_json(

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import random import random
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError, try_get, compat_str, str_or_none from ..utils import ExtractorError, try_get, compat_str, str_or_none
@ -124,7 +123,7 @@ class AudiusIE(AudiusBaseIE):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
track_id = try_get(mobj, lambda x: x.group('track_id')) track_id = try_get(mobj, lambda x: x.group('track_id'))
if track_id is None: if track_id is None:
title = mobj.group('title') title = mobj.group('title')
@ -217,7 +216,7 @@ class AudiusPlaylistIE(AudiusBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
self._select_api_base() self._select_api_base()
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
title = mobj.group('title') title = mobj.group('title')
# uploader = mobj.group('uploader') # uploader = mobj.group('uploader')
url = self._prepare_url(url, title) url = self._prepare_url(url, title)

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import base64 import base64
from .common import InfoExtractor from .common import InfoExtractor
@ -22,7 +21,7 @@ class AWAANIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<id>\d+)/(?P<season_id>\d+))?' _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<id>\d+)/(?P<season_id>\d+))?'
def _real_extract(self, url): def _real_extract(self, url):
show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() show_id, video_id, season_id = self._match_valid_url(url).groups()
if video_id and int(video_id) > 0: if video_id and int(video_id) > 0:
return self.url_result( return self.url_result(
'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo')
@ -154,7 +153,7 @@ class AWAANSeasonIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
show_id, season_id = re.match(self._VALID_URL, url).groups() show_id, season_id = self._match_valid_url(url).groups()
data = {} data = {}
if season_id: if season_id:

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from .kaltura import KalturaIE from .kaltura import KalturaIE
@ -51,7 +50,7 @@ class AZMedienIE(InfoExtractor):
_PARTNER_ID = '1719221' _PARTNER_ID = '1719221'
def _real_extract(self, url): def _real_extract(self, url):
host, display_id, article_id, entry_id = re.match(self._VALID_URL, url).groups() host, display_id, article_id, entry_id = self._match_valid_url(url).groups()
if not entry_id: if not entry_id:
entry_id = self._download_json( entry_id = self._download_json(

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unescapeHTML from ..utils import unescapeHTML
@ -33,7 +32,7 @@ class BaiduVideoIE(InfoExtractor):
path, category, playlist_id), playlist_id, note) path, category, playlist_id), playlist_id, note)
def _real_extract(self, url): def _real_extract(self, url):
category, playlist_id = re.match(self._VALID_URL, url).groups() category, playlist_id = self._match_valid_url(url).groups()
if category == 'show': if category == 'show':
category = 'tvshow' category = 'tvshow'
if category == 'tv': if category == 'tv':

@ -294,7 +294,7 @@ class BandcampAlbumIE(BandcampIE):
else super(BandcampAlbumIE, cls).suitable(url)) else super(BandcampAlbumIE, cls).suitable(url))
def _real_extract(self, url): def _real_extract(self, url):
uploader_id, album_id = re.match(self._VALID_URL, url).groups() uploader_id, album_id = self._match_valid_url(url).groups()
playlist_id = album_id or uploader_id playlist_id = album_id or uploader_id
webpage = self._download_webpage(url, playlist_id) webpage = self._download_webpage(url, playlist_id)
tralbum = self._extract_data_attr(webpage, playlist_id) tralbum = self._extract_data_attr(webpage, playlist_id)

@ -40,7 +40,7 @@ class BeatportIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
track_id = mobj.group('id') track_id = mobj.group('id')
display_id = mobj.group('display_id') display_id = mobj.group('display_id')

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import url_basename from ..utils import url_basename
@ -24,7 +23,7 @@ class BehindKinkIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
display_id = mobj.group('id') display_id = mobj.group('id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
@ -78,7 +77,7 @@ class BellMediaIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
domain, video_id = re.match(self._VALID_URL, url).groups() domain, video_id = self._match_valid_url(url).groups()
domain = domain.split('.')[0] domain = domain.split('.')[0]
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',

@ -144,7 +144,7 @@ class BiliBiliIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id_bv') or mobj.group('id') video_id = mobj.group('id_bv') or mobj.group('id')
av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None) av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None)

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import parse_iso8601 from ..utils import parse_iso8601
@ -48,7 +47,7 @@ class BlackboardCollaborateIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
region = mobj.group('region') region = mobj.group('region')
video_id = mobj.group('id') video_id = mobj.group('id')
info = self._download_json( info = self._download_json(

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_parse_qs from ..compat import compat_parse_qs
@ -45,7 +44,7 @@ class BokeCCIE(BokeCCBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query')) qs = compat_parse_qs(self._match_valid_url(url).group('query'))
if not qs.get('vid') or not qs.get('uid'): if not qs.get('vid') or not qs.get('uid'):
raise ExtractorError('Invalid URL', expected=True) raise ExtractorError('Invalid URL', expected=True)

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
@ -22,7 +21,7 @@ class BongaCamsIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
host = mobj.group('host') host = mobj.group('host')
channel_id = mobj.group('id') channel_id = mobj.group('id')

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -30,7 +29,7 @@ class BoxIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
shared_name, file_id = re.match(self._VALID_URL, url).groups() shared_name, file_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, file_id) webpage = self._download_webpage(url, file_id)
request_token = self._parse_json(self._search_regex( request_token = self._parse_json(self._search_regex(
r'Box\.config\s*=\s*({.+?});', webpage, r'Box\.config\s*=\s*({.+?});', webpage,

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -86,7 +85,7 @@ class BRIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
base_url, display_id = re.search(self._VALID_URL, url).groups() base_url, display_id = self._match_valid_url(url).groups()
page = self._download_webpage(url, display_id) page = self._download_webpage(url, display_id)
xml_url = self._search_regex( xml_url = self._search_regex(
r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL') r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL')

@ -42,7 +42,7 @@ class BravoTVIE(AdobePassIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
site, display_id = re.match(self._VALID_URL, url).groups() site, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
settings = self._parse_json(self._search_regex( settings = self._parse_json(self._search_regex(
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'), r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from .youtube import YoutubeIE from .youtube import YoutubeIE
@ -41,7 +40,7 @@ class BreakIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id, video_id = re.match(self._VALID_URL, url).groups() display_id, video_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)

@ -290,7 +290,7 @@ class BrightcoveLegacyIE(InfoExtractor):
url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url) url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
# Change bckey (used by bcove.me urls) to playerKey # Change bckey (used by bcove.me urls) to playerKey
url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
query_str = mobj.group('query') query_str = mobj.group('query')
query = compat_urlparse.parse_qs(query_str) query = compat_urlparse.parse_qs(query_str)
@ -595,7 +595,7 @@ class BrightcoveNewIE(AdobePassIE):
'ip_blocks': smuggled_data.get('geo_ip_blocks'), 'ip_blocks': smuggled_data.get('geo_ip_blocks'),
}) })
account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups()
policy_key_id = '%s_%s' % (account_id, player_id) policy_key_id = '%s_%s' % (account_id, player_id)
policy_key = self._downloader.cache.load('brightcove', policy_key_id) policy_key = self._downloader.cache.load('brightcove', policy_key_id)

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -52,7 +51,7 @@ class BYUtvIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id display_id = mobj.group('display_id') or video_id

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import js_to_json from ..utils import js_to_json
@ -31,7 +30,7 @@ class C56IE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) mobj = self._match_valid_url(url)
text_id = mobj.group('textid') text_id = mobj.group('textid')
webpage = self._download_webpage(url, text_id) webpage = self._download_webpage(url, text_id)

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -50,7 +49,7 @@ class CanalplusIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
site, display_id, video_id = re.match(self._VALID_URL, url).groups() site, display_id, video_id = self._match_valid_url(url).groups()
site_id = self._SITE_ID_MAP[site] site_id = self._SITE_ID_MAP[site]

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from .gigya import GigyaBaseIE from .gigya import GigyaBaseIE
@ -47,7 +46,7 @@ class CanvasIE(InfoExtractor):
_REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1' _REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
site_id, video_id = mobj.group('site_id'), mobj.group('id') site_id, video_id = mobj.group('site_id'), mobj.group('id')
data = None data = None
@ -192,7 +191,7 @@ class CanvasEenIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
site_id, display_id = mobj.group('site_id'), mobj.group('id') site_id, display_id = mobj.group('site_id'), mobj.group('id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .cbs import CBSIE from .cbs import CBSIE
from ..utils import int_or_none from ..utils import int_or_none
@ -71,7 +70,7 @@ class CBSInteractiveIE(CBSIE):
} }
def _real_extract(self, url): def _real_extract(self, url):
site, display_id = re.match(self._VALID_URL, url).groups() site, display_id = self._match_valid_url(url).groups()
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
data_json = self._html_search_regex( data_json = self._html_search_regex(

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
# from .cbs import CBSBaseIE # from .cbs import CBSBaseIE
from .common import InfoExtractor from .common import InfoExtractor
@ -30,7 +29,7 @@ class CBSSportsEmbedIE(InfoExtractor):
# return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id) # return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
def _real_extract(self, url): def _real_extract(self, url):
uuid, pcid = re.match(self._VALID_URL, url).groups() uuid, pcid = self._match_valid_url(url).groups()
query = {'id': uuid} if uuid else {'pcid': pcid} query = {'id': uuid} if uuid else {'pcid': pcid}
video = self._download_json( video = self._download_json(
'https://www.cbssports.com/api/content/video/', 'https://www.cbssports.com/api/content/video/',

@ -3,7 +3,6 @@ from __future__ import unicode_literals
import calendar import calendar
import datetime import datetime
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -61,7 +60,7 @@ class CCMAIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
media_type, media_id = re.match(self._VALID_URL, url).groups() media_type, media_id = self._match_valid_url(url).groups()
media = self._download_json( media = self._download_json(
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={

@ -96,7 +96,7 @@ class Channel9IE(InfoExtractor):
return self.playlist_result(entries, video_id, title_text) return self.playlist_result(entries, video_id, title_text)
def _real_extract(self, url): def _real_extract(self, url):
content_path, rss = re.match(self._VALID_URL, url).groups() content_path, rss = self._match_valid_url(url).groups()
if rss: if rss:
return self._extract_list(content_path, url) return self._extract_list(content_path, url)

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json import json
from .common import InfoExtractor from .common import InfoExtractor
@ -51,7 +50,7 @@ class ChilloutzoneIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .hbo import HBOBaseIE from .hbo import HBOBaseIE
@ -23,7 +22,7 @@ class CinemaxIE(HBOBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
path, video_id = re.match(self._VALID_URL, url).groups() path, video_id = self._match_valid_url(url).groups()
info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id) info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id)
info['id'] = video_id info['id'] = video_id
return info return info

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -30,7 +29,7 @@ class CJSWIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
program, episode_id = mobj.group('program', 'id') program, episode_id = mobj.group('program', 'id')
audio_id = '%s/%s' % (program, episode_id) audio_id = '%s/%s' % (program, episode_id)

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import smuggle_url from ..utils import smuggle_url
@ -57,7 +56,7 @@ class CNBCVideoIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
path, display_id = re.match(self._VALID_URL, url).groups() path, display_id = self._match_valid_url(url).groups()
video_id = self._download_json( video_id = self._download_json(
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={ 'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
'query': '''{ 'query': '''{

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from .turner import TurnerBaseIE from .turner import TurnerBaseIE
@ -88,7 +87,7 @@ class CNNIE(TurnerBaseIE):
return None return None
def _real_extract(self, url): def _real_extract(self, url):
sub_domain, path, page_title = re.match(self._VALID_URL, url).groups() sub_domain, path, page_title = self._match_valid_url(url).groups()
if sub_domain not in ('money', 'edition'): if sub_domain not in ('money', 'edition'):
sub_domain = 'edition' sub_domain = 'edition'
config = self._CONFIG[sub_domain] config = self._CONFIG[sub_domain]

@ -447,23 +447,22 @@ class InfoExtractor(object):
self.set_downloader(downloader) self.set_downloader(downloader)
@classmethod @classmethod
def suitable(cls, url): def _match_valid_url(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
# This does not use has/getattr intentionally - we want to know whether # This does not use has/getattr intentionally - we want to know whether
# we have cached the regexp for *this* class, whereas getattr would also # we have cached the regexp for *this* class, whereas getattr would also
# match the superclass # match the superclass
if '_VALID_URL_RE' not in cls.__dict__: if '_VALID_URL_RE' not in cls.__dict__:
cls._VALID_URL_RE = re.compile(cls._VALID_URL) cls._VALID_URL_RE = re.compile(cls._VALID_URL)
return cls._VALID_URL_RE.match(url) is not None return cls._VALID_URL_RE.match(url)
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
return cls._match_valid_url(url) is not None
@classmethod @classmethod
def _match_id(cls, url): def _match_id(cls, url):
if '_VALID_URL_RE' not in cls.__dict__: return cls._match_valid_url(url).group('id')
cls._VALID_URL_RE = re.compile(cls._VALID_URL)
m = cls._VALID_URL_RE.match(url)
assert m
return compat_str(m.group('id'))
@classmethod @classmethod
def working(cls): def working(cls):

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -72,4 +71,4 @@ class ViewSourceIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
return self.url_result(re.match(self._VALID_URL, url).group('url')) return self.url_result(self._match_valid_url(url).group('url'))

@ -222,7 +222,7 @@ class CondeNastIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id, player_id, target, url_type, display_id = re.match(self._VALID_URL, url).groups() video_id, player_id, target, url_type, display_id = self._match_valid_url(url).groups()
if video_id: if video_id:
return self._extract_video({ return self._extract_video({

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .theplatform import ThePlatformFeedIE from .theplatform import ThePlatformFeedIE
from ..utils import ( from ..utils import (
@ -96,7 +95,7 @@ class CorusIE(ThePlatformFeedIE):
} }
def _real_extract(self, url): def _real_extract(self, url):
domain, video_id = re.match(self._VALID_URL, url).groups() domain, video_id = self._match_valid_url(url).groups()
site = domain.split('.')[0] site = domain.split('.')[0]
path = self._SITE_MAP.get(site, site) path = self._SITE_MAP.get(site, site)
if path != 'series': if path != 'series':

@ -413,7 +413,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
return subtitles return subtitles
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
if mobj.group('prefix') == 'm': if mobj.group('prefix') == 'm':

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
@ -32,7 +31,7 @@ class CultureUnpluggedIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id display_id = mobj.group('display_id') or video_id

@ -204,7 +204,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
return urls return urls
def _real_extract(self, url): def _real_extract(self, url):
video_id, playlist_id = re.match(self._VALID_URL, url).groups() video_id, playlist_id = self._match_valid_url(url).groups()
if playlist_id: if playlist_id:
if not self.get_param('noplaylist'): if not self.get_param('noplaylist'):

@ -38,7 +38,7 @@ class DBTVIE(InfoExtractor):
webpage)] webpage)]
def _real_extract(self, url): def _real_extract(self, url):
display_id, video_id = re.match(self._VALID_URL, url).groups() display_id, video_id = self._match_valid_url(url).groups()
info = { info = {
'_type': 'url_transparent', '_type': 'url_transparent',
'id': video_id, 'id': video_id,

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -16,7 +15,7 @@ class DeezerBaseInfoExtractor(InfoExtractor):
if not self.get_param('test'): if not self.get_param('test'):
self.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!') self.report_warning('For now, this extractor only supports the 30 second previews. Patches welcome!')
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
data_id = mobj.group('id') data_id = mobj.group('id')
webpage = self._download_webpage(url, data_id) webpage = self._download_webpage(url, data_id)

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import unified_strdate from ..utils import unified_strdate
@ -23,7 +22,7 @@ class DFBIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
display_id, video_id = re.match(self._VALID_URL, url).groups() display_id, video_id = self._match_valid_url(url).groups()
player_info = self._download_xml( player_info = self._download_xml(
'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id, 'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id,

@ -70,7 +70,7 @@ class DigitekaIE(InfoExtractor):
return mobj.group('url') return mobj.group('url')
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
video_type = mobj.group('embed_type') or mobj.group('site_type') video_type = mobj.group('embed_type') or mobj.group('site_type')
if video_type == 'music': if video_type == 'music':

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import random import random
import re
import string import string
from .discoverygo import DiscoveryGoBaseIE from .discoverygo import DiscoveryGoBaseIE
@ -62,7 +61,7 @@ class DiscoveryIE(DiscoveryGoBaseIE):
_API_BASE_URL = 'https://api.discovery.com/v1/' _API_BASE_URL = 'https://api.discovery.com/v1/'
def _real_extract(self, url): def _real_extract(self, url):
site, show_slug, display_id = re.match(self._VALID_URL, url).groups() site, show_slug, display_id = self._match_valid_url(url).groups()
access_token = None access_token = None
cookies = self._get_cookies(url) cookies = self._get_cookies(url)

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .dplay import DPlayIE from .dplay import DPlayIE
@ -35,7 +34,7 @@ class DiscoveryNetworksDeIE(DPlayIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
domain, programme, alternate_id = re.match(self._VALID_URL, url).groups() domain, programme, alternate_id = self._match_valid_url(url).groups()
country = 'GB' if domain == 'dplay.co.uk' else 'DE' country = 'GB' if domain == 'dplay.co.uk' else 'DE'
realm = 'questuk' if country == 'GB' else domain.replace('.', '') realm = 'questuk' if country == 'GB' else domain.replace('.', '')
return self._get_disco_api_info( return self._get_disco_api_info(

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from ..compat import compat_str from ..compat import compat_str
from ..utils import try_get from ..utils import try_get
@ -95,5 +94,5 @@ class DiscoveryPlusIndiaShowIE(InfoExtractor):
page_num += 1 page_num += 1
def _real_extract(self, url): def _real_extract(self, url):
show_name = re.match(self._VALID_URL, url).group('show_name') show_name = self._match_valid_url(url).group('show_name')
return self.playlist_result(self._entries(show_name), playlist_id=show_name) return self.playlist_result(self._entries(show_name), playlist_id=show_name)

@ -77,7 +77,7 @@ class DisneyIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
domain, video_id, display_id = re.match(self._VALID_URL, url).groups() domain, video_id, display_id = self._match_valid_url(url).groups()
if not video_id: if not video_id:
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
grill = re.sub(r'"\s*\+\s*"', '', self._search_regex( grill = re.sub(r'"\s*\+\s*"', '', self._search_regex(

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import int_or_none
@ -26,7 +25,7 @@ class DLiveVODIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
uploader_id, vod_id = re.match(self._VALID_URL, url).groups() uploader_id, vod_id = self._match_valid_url(url).groups()
broadcast = self._download_json( broadcast = self._download_json(
'https://graphigo.prd.dlive.tv/', vod_id, 'https://graphigo.prd.dlive.tv/', vod_id,
data=json.dumps({'query': '''query { data=json.dumps({'query': '''query {

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..compat import compat_HTTPError
@ -287,7 +286,7 @@ class DPlayIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
display_id = mobj.group('id') display_id = mobj.group('id')
domain = mobj.group('domain').lstrip('www.') domain = mobj.group('domain').lstrip('www.')
country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country') country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -26,7 +25,7 @@ class DRBonanzaIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id, display_id = mobj.group('id', 'display_id') video_id, display_id = mobj.group('id', 'display_id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)

@ -26,7 +26,7 @@ class DropboxIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
fn = compat_urllib_parse_unquote(url_basename(url)) fn = compat_urllib_parse_unquote(url_basename(url))
title = os.path.splitext(fn)[0] title = os.path.splitext(fn)[0]

@ -42,7 +42,7 @@ class DrTuberIE(InfoExtractor):
webpage) webpage)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id display_id = mobj.group('display_id') or video_id

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from socket import timeout from socket import timeout
from .common import InfoExtractor from .common import InfoExtractor
@ -32,7 +31,7 @@ class DTubeIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
uploader_id, video_id = re.match(self._VALID_URL, url).groups() uploader_id, video_id = self._match_valid_url(url).groups()
result = self._download_json('https://api.steemit.com/', video_id, data=json.dumps({ result = self._download_json('https://api.steemit.com/', video_id, data=json.dumps({
'jsonrpc': '2.0', 'jsonrpc': '2.0',
'method': 'get_content', 'method': 'get_content',

@ -186,7 +186,7 @@ class DubokuPlaylistIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
if mobj is None: if mobj is None:
raise ExtractorError('Invalid URL: %s' % url) raise ExtractorError('Invalid URL: %s' % url)
series_id = mobj.group('id') series_id = mobj.group('id')

@ -123,7 +123,7 @@ class EaglePlatformIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id') host, video_id = mobj.group('custom_host') or mobj.group('host'), mobj.group('id')
headers = {} headers = {}

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -48,7 +47,7 @@ class EinthusanIE(InfoExtractor):
)).decode('utf-8'), video_id) )).decode('utf-8'), video_id)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
host = mobj.group('host') host = mobj.group('host')
video_id = mobj.group('id') video_id = mobj.group('id')

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -51,7 +50,7 @@ class EpornerIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id display_id = mobj.group('display_id') or video_id

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -36,7 +35,7 @@ class EveryonesMixtapeIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import hashlib import hashlib
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -138,7 +137,7 @@ class FC2EmbedIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
query = compat_parse_qs(mobj.group('query')) query = compat_parse_qs(mobj.group('query'))
video_id = query['i'][-1] video_id = query['i'][-1]

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
@ -22,7 +21,7 @@ class FilmwebIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
article_type, article_id = re.match(self._VALID_URL, url).groups() article_type, article_id = self._match_valid_url(url).groups()
if article_type == 'filmnytt': if article_type == 'filmnytt':
webpage = self._download_webpage(url, article_id) webpage = self._download_webpage(url, article_id)
article_id = self._search_regex(r'data-videoid="(\d+)"', webpage, 'article id') article_id = self._search_regex(r'data-videoid="(\d+)"', webpage, 'article id')

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import int_or_none
@ -66,7 +65,7 @@ class FiveTVIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') or mobj.group('path') video_id = mobj.group('id') or mobj.group('path')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)

@ -41,7 +41,7 @@ class FourTubeBaseIE(InfoExtractor):
return formats return formats
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
kind, video_id, display_id = mobj.group('kind', 'id', 'display_id') kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
if kind == 'm' or not display_id: if kind == 'm' or not display_id:
@ -228,7 +228,7 @@ class PornTubeIE(FourTubeBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id, display_id = mobj.group('id', 'display_id') video_id, display_id = mobj.group('id', 'display_id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)

@ -67,7 +67,7 @@ class FoxNewsIE(AMPIE):
webpage)] webpage)]
def _real_extract(self, url): def _real_extract(self, url):
host, video_id = re.match(self._VALID_URL, url).groups() host, video_id = self._match_valid_url(url).groups()
info = self._extract_feed_info( info = self._extract_feed_info(
'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id)) 'http://%s/v/feed/video/%s.js?template=fox' % (host, video_id))

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -222,7 +221,7 @@ class FranceTVIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
catalog = mobj.group('catalog') catalog = mobj.group('catalog')
@ -546,7 +545,7 @@ class FranceTVJeunesseIE(FranceTVBaseInfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
playlist_id = mobj.group('id') playlist_id = mobj.group('id')
playlist = self._download_json( playlist = self._download_json(

@ -207,7 +207,7 @@ class FrontendMastersLessonIE(FrontendMastersPageBaseIE):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
course_name, lesson_name = mobj.group('course_name', 'lesson_name') course_name, lesson_name = mobj.group('course_name', 'lesson_name')
course = self._download_course(course_name, url) course = self._download_course(course_name, url)

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import random import random
import re
import string import string
from .common import InfoExtractor from .common import InfoExtractor
@ -49,7 +48,7 @@ class FunimationPageIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
display_id = mobj.group('id').replace('/', '_') display_id = mobj.group('id').replace('/', '_')
if not mobj.group('lang'): if not mobj.group('lang'):
url = '%s/en/%s' % (mobj.group('origin'), mobj.group('path')) url = '%s/en/%s' % (mobj.group('origin'), mobj.group('path'))
@ -304,7 +303,7 @@ class FunimationShowIE(FunimationIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
base_url, locale, display_id = re.match(self._VALID_URL, url).groups() base_url, locale, display_id = self._match_valid_url(url).groups()
show_info = self._download_json( show_info = self._download_json(
'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=US&deviceType=web&locale=%s' 'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=US&deviceType=web&locale=%s'

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from .nexx import NexxIE from .nexx import NexxIE
@ -31,7 +30,7 @@ class FunkIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id, nexx_id = re.match(self._VALID_URL, url).groups() display_id, nexx_id = self._match_valid_url(url).groups()
video = self._download_json( video = self._download_json(
'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id) 'https://www.funk.net/api/v4.0/videos/' + nexx_id, nexx_id)
return { return {

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
@ -76,7 +75,7 @@ class GaiaIE(InfoExtractor):
self._jwt = auth.get('jwt') self._jwt = auth.get('jwt')
def _real_extract(self, url): def _real_extract(self, url):
display_id, vtype = re.search(self._VALID_URL, url).groups() display_id, vtype = self._match_valid_url(url).groups()
node_id = self._download_json( node_id = self._download_json(
'https://brooklyn.gaia.com/pathinfo', display_id, query={ 'https://brooklyn.gaia.com/pathinfo', display_id, query={
'path': 'video/' + display_id, 'path': 'video/' + display_id,

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -34,7 +33,7 @@ class GameStarIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
site = mobj.group('site') site = mobj.group('site')
video_id = mobj.group('id') video_id = mobj.group('id')

@ -51,7 +51,7 @@ class GaskrankIE(InfoExtractor):
webpage, default=None) or self._html_search_meta( webpage, default=None) or self._html_search_meta(
'title', webpage, fatal=True) 'title', webpage, fatal=True)
categories = [re.match(self._VALID_URL, url).group('categories')] categories = [self._match_valid_url(url).group('categories')]
mobj = re.search( mobj = re.search(
r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])', r'Video von:\s*(?P<uploader_id>[^|]*?)\s*\|\s*vom:\s*(?P<upload_date>[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9])',

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
@ -34,7 +33,7 @@ class GazetaIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
display_id = mobj.group('id') display_id = mobj.group('id')
embed_url = '%s?p=embed' % mobj.group('url') embed_url = '%s?p=embed' % mobj.group('url')

@ -149,7 +149,7 @@ class GDCVaultIE(InfoExtractor):
return start_page return start_page
def _real_extract(self, url): def _real_extract(self, url):
video_id, name = re.match(self._VALID_URL, url).groups() video_id, name = self._match_valid_url(url).groups()
display_id = name or video_id display_id = name or video_id
webpage_url = 'http://www.gdcvault.com/play/' + video_id webpage_url = 'http://www.gdcvault.com/play/' + video_id

@ -143,7 +143,7 @@ class GediDigitalIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
url = re.match(self._VALID_URL, url).group('url') url = self._match_valid_url(url).group('url')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title = self._html_search_meta( title = self._html_search_meta(
['twitter:title', 'og:title'], webpage, fatal=True) ['twitter:title', 'og:title'], webpage, fatal=True)

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import json import json
from .common import InfoExtractor from .common import InfoExtractor
@ -32,7 +31,7 @@ class GiantBombIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
display_id = mobj.group('display_id') display_id = mobj.group('display_id')

@ -161,7 +161,7 @@ class GoIE(AdobePassIE):
display_id)['video'] display_id)['video']
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
sub_domain = remove_start(remove_end(mobj.group('sub_domain') or '', '.go'), 'www.') sub_domain = remove_start(remove_end(mobj.group('sub_domain') or '', '.go'), 'www.')
video_id, display_id = mobj.group('id', 'display_id') video_id, display_id = mobj.group('id', 'display_id')
site_info = self._SITE_INFO.get(sub_domain, {}) site_info = self._SITE_INFO.get(sub_domain, {})

@ -1,6 +1,5 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -29,7 +28,7 @@ class GodTubeIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
config = self._download_xml( config = self._download_xml(

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -56,7 +55,7 @@ class GooglePodcastsIE(GooglePodcastsBaseIE):
} }
def _real_extract(self, url): def _real_extract(self, url):
b64_feed_url, b64_guid = re.match(self._VALID_URL, url).groups() b64_feed_url, b64_guid = self._match_valid_url(url).groups()
episode = self._batch_execute( episode = self._batch_execute(
'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1] 'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
return self._extract_episode(episode) return self._extract_episode(episode)

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -48,7 +47,7 @@ class HearThisAtIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) m = self._match_valid_url(url)
display_id = '{artist:s} - {title:s}'.format(**m.groupdict()) display_id = '{artist:s} - {title:s}'.format(**m.groupdict())
api_url = url.replace('www.', '').replace('hearthis.at', 'api-v2.hearthis.at') api_url = url.replace('www.', '').replace('hearthis.at', 'api-v2.hearthis.at')
data_json = self._download_json(api_url, display_id) data_json = self._download_json(api_url, display_id)

@ -1,7 +1,6 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
@ -55,7 +54,7 @@ class HiDiveIE(InfoExtractor):
self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data)) self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data))
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
title, key = mobj.group('title', 'key') title, key = mobj.group('title', 'key')
video_id = '%s/%s' % (title, key) video_id = '%s/%s' % (title, key)

@ -173,7 +173,7 @@ class HotStarIE(HotStarBaseIE):
} }
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('id') video_id = mobj.group('id')
video_type = mobj.group('type') video_type = mobj.group('type')
cookies = self._get_cookies(url) cookies = self._get_cookies(url)

@ -2,7 +2,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..compat import compat_HTTPError
@ -135,7 +134,7 @@ class HRTiIE(HRTiBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
video_id = mobj.group('short_id') or mobj.group('id') video_id = mobj.group('short_id') or mobj.group('id')
display_id = mobj.group('display_id') or video_id display_id = mobj.group('display_id') or video_id
@ -191,7 +190,7 @@ class HRTiPlaylistIE(HRTiBaseIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = self._match_valid_url(url)
category_id = mobj.group('id') category_id = mobj.group('id')
display_id = mobj.group('display_id') or category_id display_id = mobj.group('display_id') or category_id

@ -139,9 +139,9 @@ class HungamaAlbumPlaylistIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, id) webpage = self._download_webpage(url, video_id)
ptrn = r'<meta[^>]+?property=[\"\']?music:song:url[\"\']?[^>]+?content=[\"\']?([^\"\']+)' ptrn = r'<meta[^>]+?property=[\"\']?music:song:url[\"\']?[^>]+?content=[\"\']?([^\"\']+)'
items = re.findall(ptrn, webpage) items = re.findall(ptrn, webpage)
entries = [self.url_result(item, ie=HungamaSongIE.ie_key()) for item in items] entries = [self.url_result(item, ie=HungamaSongIE.ie_key()) for item in items]
return self.playlist_result(entries, id) return self.playlist_result(entries, video_id)

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save