Remove extractors

[beampro] Remove Extractor (#17290, #22871, #23020, #23061, #26099)
[smotri] Remove extractor (#27358)
pull/280/head
pukkandan 5 years ago
parent 90c9145946
commit ff695879bc

@ -421,7 +421,7 @@ I will add some memorable short links to the binaries so you can download them e
out, youtube-dlc will ask interactively.
-2, --twofactor TWOFACTOR Two-factor authentication code
-n, --netrc Use .netrc authentication data
--video-password PASSWORD Video password (vimeo, smotri, youku)
--video-password PASSWORD Video password (vimeo, youku)
## Adobe Pass Options:
--ap-mso MSO Adobe Pass multiple-system operator (TV

@ -508,8 +508,6 @@
- **mixcloud**
- **mixcloud:playlist**
- **mixcloud:user**
- **Mixer:live**
- **Mixer:vod**
- **MLB**
- **Mnet**
- **MNetTV**
@ -825,10 +823,6 @@
- **Slideshare**
- **SlidesLive**
- **Slutload**
- **smotri**: Smotri.com
- **smotri:broadcast**: Smotri.com broadcasts
- **smotri:community**: Smotri.com community videos
- **smotri:user**: Smotri.com user videos
- **Snotr**
- **Sohu**
- **SonyLIV**

@ -1,194 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
ExtractorError,
clean_html,
compat_str,
float_or_none,
int_or_none,
parse_iso8601,
try_get,
urljoin,
)
class BeamProBaseIE(InfoExtractor):
_API_BASE = 'https://mixer.com/api/v1'
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
def _extract_channel_info(self, chan):
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
return {
'uploader': chan.get('token') or try_get(
chan, lambda x: x['user']['username'], compat_str),
'uploader_id': compat_str(user_id) if user_id else None,
'age_limit': self._RATINGS.get(chan.get('audience')),
}
class BeamProLiveIE(BeamProBaseIE):
IE_NAME = 'Mixer:live'
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'http://mixer.com/niterhayven',
'info_dict': {
'id': '261562',
'ext': 'mp4',
'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
'thumbnail': r're:https://.*\.jpg$',
'timestamp': 1483477281,
'upload_date': '20170103',
'uploader': 'niterhayven',
'uploader_id': '373396',
'age_limit': 18,
'is_live': True,
'view_count': int,
},
'skip': 'niterhayven is offline',
'params': {
'skip_download': True,
},
}
_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
@classmethod
def suitable(cls, url):
return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
def _real_extract(self, url):
channel_name = self._match_id(url)
chan = self._download_json(
'%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
if chan.get('online') is False:
raise ExtractorError(
'{0} is offline'.format(channel_name), expected=True)
channel_id = chan['id']
def manifest_url(kind):
return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
formats = self._extract_m3u8_formats(
manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
fatal=False)
formats.extend(self._extract_smil_formats(
manifest_url('smil'), channel_name, fatal=False))
self._sort_formats(formats)
info = {
'id': compat_str(chan.get('id') or channel_name),
'title': self._live_title(chan.get('name') or channel_name),
'description': clean_html(chan.get('description')),
'thumbnail': try_get(
chan, lambda x: x['thumbnail']['url'], compat_str),
'timestamp': parse_iso8601(chan.get('updatedAt')),
'is_live': True,
'view_count': int_or_none(chan.get('viewersTotal')),
'formats': formats,
}
info.update(self._extract_channel_info(chan))
return info
class BeamProVodIE(BeamProBaseIE):
IE_NAME = 'Mixer:vod'
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)'
_TESTS = [{
'url': 'https://mixer.com/willow8714?vod=2259830',
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
'info_dict': {
'id': '2259830',
'ext': 'mp4',
'title': 'willow8714\'s Channel',
'duration': 6828.15,
'thumbnail': r're:https://.*source\.png$',
'timestamp': 1494046474,
'upload_date': '20170506',
'uploader': 'willow8714',
'uploader_id': '6085379',
'age_limit': 13,
'view_count': int,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw',
'only_matching': True,
}, {
'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig',
'only_matching': True,
}]
@staticmethod
def _extract_format(vod, vod_type):
if not vod.get('baseUrl'):
return []
if vod_type == 'hls':
filename, protocol = 'manifest.m3u8', 'm3u8_native'
elif vod_type == 'raw':
filename, protocol = 'source.mp4', 'https'
else:
assert False
data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
format_id = [vod_type]
if isinstance(data.get('Height'), compat_str):
format_id.append('%sp' % data['Height'])
return [{
'url': urljoin(vod['baseUrl'], filename),
'format_id': '-'.join(format_id),
'ext': 'mp4',
'protocol': protocol,
'width': int_or_none(data.get('Width')),
'height': int_or_none(data.get('Height')),
'fps': int_or_none(data.get('Fps')),
'tbr': int_or_none(data.get('Bitrate'), 1000),
}]
def _real_extract(self, url):
vod_id = self._match_id(url)
vod_info = self._download_json(
'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
state = vod_info.get('state')
if state != 'AVAILABLE':
raise ExtractorError(
'VOD %s is not available (state: %s)' % (vod_id, state),
expected=True)
formats = []
thumbnail_url = None
for vod in vod_info['vods']:
vod_type = vod.get('format')
if vod_type in ('hls', 'raw'):
formats.extend(self._extract_format(vod, vod_type))
elif vod_type == 'thumbnail':
thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
self._sort_formats(formats)
info = {
'id': vod_id,
'title': vod_info.get('name') or vod_id,
'duration': float_or_none(vod_info.get('duration')),
'thumbnail': thumbnail_url,
'timestamp': parse_iso8601(vod_info.get('createdAt')),
'view_count': int_or_none(vod_info.get('viewsTotal')),
'formats': formats,
}
info.update(self._extract_channel_info(vod_info.get('channel') or {}))
return info

@ -95,10 +95,6 @@ from .bbc import (
BBCCoUkPlaylistIE,
BBCIE,
)
from .beampro import (
BeamProLiveIE,
BeamProVodIE,
)
from .beeg import BeegIE
from .behindkink import BehindKinkIE
from .bellmedia import BellMediaIE
@ -1080,12 +1076,6 @@ from .sky import (
from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE
from .smotri import (
SmotriIE,
SmotriCommunityIE,
SmotriUserIE,
SmotriBroadcastIE,
)
from .snotr import SnotrIE
from .sohu import SohuIE
from .sonyliv import SonyLIVIE

@ -48,7 +48,6 @@ from .ooyala import OoyalaIE
from .rutv import RUTVIE
from .tvc import TVCIE
from .sportbox import SportBoxIE
from .smotri import SmotriIE
from .myvi import MyviIE
from .condenast import CondeNastIE
from .udn import UDNEmbedIE
@ -2786,11 +2785,6 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'))
# Look for embedded smotri.com player
smotri_url = SmotriIE._extract_url(webpage)
if smotri_url:
return self.url_result(smotri_url, 'Smotri')
# Look for embedded Myvi.ru player
myvi_url = MyviIE._extract_url(webpage)
if myvi_url:

@ -1,416 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import json
import hashlib
import uuid
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
sanitized_Request,
unified_strdate,
urlencode_postdata,
xpath_text,
)
class SmotriIE(InfoExtractor):
IE_DESC = 'Smotri.com'
IE_NAME = 'smotri'
_VALID_URL = r'https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})'
_NETRC_MACHINE = 'smotri'
_TESTS = [
# real video id 2610366
{
'url': 'http://smotri.com/video/view/?id=v261036632ab',
'md5': '02c0dfab2102984e9c5bb585cc7cc321',
'info_dict': {
'id': 'v261036632ab',
'ext': 'mp4',
'title': 'катастрофа с камер видеонаблюдения',
'uploader': 'rbc2008',
'uploader_id': 'rbc08',
'upload_date': '20131118',
'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg',
},
},
# real video id 57591
{
'url': 'http://smotri.com/video/view/?id=v57591cb20',
'md5': '830266dfc21f077eac5afd1883091bcd',
'info_dict': {
'id': 'v57591cb20',
'ext': 'flv',
'title': 'test',
'uploader': 'Support Photofile@photofile',
'uploader_id': 'support-photofile',
'upload_date': '20070704',
'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg',
},
},
# video-password, not approved by moderator
{
'url': 'http://smotri.com/video/view/?id=v1390466a13c',
'md5': 'f6331cef33cad65a0815ee482a54440b',
'info_dict': {
'id': 'v1390466a13c',
'ext': 'mp4',
'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1',
'uploader': 'timoxa40',
'uploader_id': 'timoxa40',
'upload_date': '20100404',
'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg',
},
'params': {
'videopassword': 'qwerty',
},
'skip': 'Video is not approved by moderator',
},
# video-password
{
'url': 'http://smotri.com/video/view/?id=v6984858774#',
'md5': 'f11e01d13ac676370fc3b95b9bda11b0',
'info_dict': {
'id': 'v6984858774',
'ext': 'mp4',
'title': 'Дача Солженицина ПАРОЛЬ 223322',
'uploader': 'psavari1',
'uploader_id': 'psavari1',
'upload_date': '20081103',
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'videopassword': '223322',
},
},
# age limit + video-password, not approved by moderator
{
'url': 'http://smotri.com/video/view/?id=v15408898bcf',
'md5': '91e909c9f0521adf5ee86fbe073aad70',
'info_dict': {
'id': 'v15408898bcf',
'ext': 'flv',
'title': 'этот ролик не покажут по ТВ',
'uploader': 'zzxxx',
'uploader_id': 'ueggb',
'upload_date': '20101001',
'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg',
'age_limit': 18,
},
'params': {
'videopassword': '333'
},
'skip': 'Video is not approved by moderator',
},
# age limit + video-password
{
'url': 'http://smotri.com/video/view/?id=v7780025814',
'md5': 'b4599b068422559374a59300c5337d72',
'info_dict': {
'id': 'v7780025814',
'ext': 'mp4',
'title': 'Sexy Beach (пароль 123)',
'uploader': 'вАся',
'uploader_id': 'asya_prosto',
'upload_date': '20081218',
'thumbnail': r're:^https?://.*\.jpg$',
'age_limit': 18,
},
'params': {
'videopassword': '123'
},
},
# swf player
{
'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500',
'md5': '31099eeb4bc906712c5f40092045108d',
'info_dict': {
'id': 'v9188090500',
'ext': 'mp4',
'title': 'Shakira - Don\'t Bother',
'uploader': 'HannahL',
'uploader_id': 'lisaha95',
'upload_date': '20090331',
'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg',
},
},
]
@classmethod
def _extract_url(cls, webpage):
mobj = re.search(
r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)',
webpage)
if mobj is not None:
return mobj.group('url')
mobj = re.search(
r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s*
<div\s+class="video_image">[^<]+</div>\s*
<div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage)
if mobj is not None:
return 'http://smotri.com/video/view/?id=%s' % mobj.group('id')
def _search_meta(self, name, html, display_name=None):
if display_name is None:
display_name = name
return self._html_search_meta(name, html, display_name)
def _real_extract(self, url):
video_id = self._match_id(url)
video_form = {
'ticket': video_id,
'video_url': '1',
'frame_url': '1',
'devid': 'LoadupFlashPlayer',
'getvideoinfo': '1',
}
video_password = self._downloader.params.get('videopassword')
if video_password:
video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest()
video = self._download_json(
'http://smotri.com/video/view/url/bot/',
video_id, 'Downloading video JSON',
data=urlencode_postdata(video_form),
headers={'Content-Type': 'application/x-www-form-urlencoded'})
video_url = video.get('_vidURL') or video.get('_vidURL_mp4')
if not video_url:
if video.get('_moderate_no'):
raise ExtractorError(
'Video %s has not been approved by moderator' % video_id, expected=True)
if video.get('error'):
raise ExtractorError('Video %s does not exist' % video_id, expected=True)
if video.get('_pass_protected') == 1:
msg = ('Invalid video password' if video_password
else 'This video is protected by a password, use the --video-password option')
raise ExtractorError(msg, expected=True)
title = video['title']
thumbnail = video.get('_imgURL')
upload_date = unified_strdate(video.get('added'))
uploader = video.get('userNick')
uploader_id = video.get('userLogin')
duration = int_or_none(video.get('duration'))
# Video JSON does not provide enough meta data
# We will extract some from the video web page instead
webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id
webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page')
# Warning if video is unavailable
warning = self._html_search_regex(
r'<div[^>]+class="videoUnModer"[^>]*>(.+?)</div>', webpage,
'warning message', default=None)
if warning is not None:
self._downloader.report_warning(
'Video %s may not be available; smotri said: %s ' %
(video_id, warning))
# Adult content
if 'EroConfirmText">' in webpage:
self.report_age_confirmation()
confirm_string = self._html_search_regex(
r'<a[^>]+href="/video/view/\?id=%s&confirm=([^"]+)"' % video_id,
webpage, 'confirm string')
confirm_url = webpage_url + '&confirm=%s' % confirm_string
webpage = self._download_webpage(
confirm_url, video_id,
'Downloading video page (age confirmed)')
adult_content = True
else:
adult_content = False
view_count = self._html_search_regex(
r'(?s)Общее количество просмотров.*?<span class="Number">(\d+)</span>',
webpage, 'view count', fatal=False)
return {
'id': video_id,
'url': video_url,
'title': title,
'thumbnail': thumbnail,
'uploader': uploader,
'upload_date': upload_date,
'uploader_id': uploader_id,
'duration': duration,
'view_count': int_or_none(view_count),
'age_limit': 18 if adult_content else 0,
}
class SmotriCommunityIE(InfoExtractor):
IE_DESC = 'Smotri.com community videos'
IE_NAME = 'smotri:community'
_VALID_URL = r'https?://(?:www\.)?smotri\.com/community/video/(?P<id>[0-9A-Za-z_\'-]+)'
_TEST = {
'url': 'http://smotri.com/community/video/kommuna',
'info_dict': {
'id': 'kommuna',
},
'playlist_mincount': 4,
}
def _real_extract(self, url):
community_id = self._match_id(url)
rss = self._download_xml(
'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id,
community_id, 'Downloading community RSS')
entries = [
self.url_result(video_url.text, SmotriIE.ie_key())
for video_url in rss.findall('./channel/item/link')]
return self.playlist_result(entries, community_id)
class SmotriUserIE(InfoExtractor):
IE_DESC = 'Smotri.com user videos'
IE_NAME = 'smotri:user'
_VALID_URL = r'https?://(?:www\.)?smotri\.com/user/(?P<id>[0-9A-Za-z_\'-]+)'
_TESTS = [{
'url': 'http://smotri.com/user/inspector',
'info_dict': {
'id': 'inspector',
'title': 'Inspector',
},
'playlist_mincount': 9,
}]
def _real_extract(self, url):
user_id = self._match_id(url)
rss = self._download_xml(
'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id,
user_id, 'Downloading user RSS')
entries = [self.url_result(video_url.text, 'Smotri')
for video_url in rss.findall('./channel/item/link')]
description_text = xpath_text(rss, './channel/description') or ''
user_nickname = self._search_regex(
'^Видео режиссера (.+)$', description_text,
'user nickname', fatal=False)
return self.playlist_result(entries, user_id, user_nickname)
class SmotriBroadcastIE(InfoExtractor):
IE_DESC = 'Smotri.com broadcasts'
IE_NAME = 'smotri:broadcast'
_VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*'
_NETRC_MACHINE = 'smotri'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
broadcast_id = mobj.group('id')
broadcast_url = 'http://' + mobj.group('url')
broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page')
if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None:
raise ExtractorError(
'Broadcast %s does not exist' % broadcast_id, expected=True)
# Adult content
if re.search('EroConfirmText">', broadcast_page) is not None:
(username, password) = self._get_login_info()
if username is None:
self.raise_login_required(
'Erotic broadcasts allowed only for registered users')
login_form = {
'login-hint53': '1',
'confirm_erotic': '1',
'login': username,
'password': password,
}
request = sanitized_Request(
broadcast_url + '/?no_redirect=1', urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded')
broadcast_page = self._download_webpage(
request, broadcast_id, 'Logging in and confirming age')
if '>Неверный логин или пароль<' in broadcast_page:
raise ExtractorError(
'Unable to log in: bad username or password', expected=True)
adult_content = True
else:
adult_content = False
ticket = self._html_search_regex(
(r'data-user-file=(["\'])(?P<ticket>(?!\1).+)\1',
r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'(?P<ticket>[^']+)'\)"),
broadcast_page, 'broadcast ticket', group='ticket')
broadcast_url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
broadcast_password = self._downloader.params.get('videopassword')
if broadcast_password:
broadcast_url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
broadcast_json_page = self._download_webpage(
broadcast_url, broadcast_id, 'Downloading broadcast JSON')
try:
broadcast_json = json.loads(broadcast_json_page)
protected_broadcast = broadcast_json['_pass_protected'] == 1
if protected_broadcast and not broadcast_password:
raise ExtractorError(
'This broadcast is protected by a password, use the --video-password option',
expected=True)
broadcast_offline = broadcast_json['is_play'] == 0
if broadcast_offline:
raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True)
rtmp_url = broadcast_json['_server']
mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url)
if not mobj:
raise ExtractorError('Unexpected broadcast rtmp URL')
broadcast_playpath = broadcast_json['_streamName']
broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL'])
broadcast_thumbnail = broadcast_json.get('_imgURL')
broadcast_title = self._live_title(broadcast_json['title'])
broadcast_description = broadcast_json.get('description')
broadcaster_nick = broadcast_json.get('nick')
broadcaster_login = broadcast_json.get('login')
rtmp_conn = 'S:%s' % uuid.uuid4().hex
except KeyError:
if protected_broadcast:
raise ExtractorError('Bad broadcast password', expected=True)
raise ExtractorError('Unexpected broadcast JSON')
return {
'id': broadcast_id,
'url': rtmp_url,
'title': broadcast_title,
'thumbnail': broadcast_thumbnail,
'description': broadcast_description,
'uploader': broadcaster_nick,
'uploader_id': broadcaster_login,
'age_limit': 18 if adult_content else 0,
'ext': 'flv',
'play_path': broadcast_playpath,
'player_url': 'http://pics.smotri.com/broadcast_play.swf',
'app': broadcast_app,
'rtmp_live': True,
'rtmp_conn': rtmp_conn,
'is_live': True,
}

@ -373,7 +373,7 @@ def parseOpts(overrideArguments=None):
authentication.add_option(
'--video-password',
dest='videopassword', metavar='PASSWORD',
help='Video password (vimeo, smotri, youku)')
help='Video password (vimeo, youku)')
adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options')
adobe_pass.add_option(

Loading…
Cancel
Save