[viewlift] replace SnagFilms extractors

- add support for other sites that use the same logic
- improve format extraction and sorting
pull/8/head
remitamine 9 years ago
parent 14638e2915
commit 67167920db

@ -673,10 +673,6 @@ from .smotri import (
SmotriUserIE, SmotriUserIE,
SmotriBroadcastIE, SmotriBroadcastIE,
) )
from .snagfilms import (
SnagFilmsIE,
SnagFilmsEmbedIE,
)
from .snotr import SnotrIE from .snotr import SnotrIE
from .sohu import SohuIE from .sohu import SohuIE
from .soundcloud import ( from .soundcloud import (
@ -879,6 +875,10 @@ from .vidme import (
) )
from .vidzi import VidziIE from .vidzi import VidziIE
from .vier import VierIE, VierVideosIE from .vier import VierIE, VierVideosIE
from .viewlift import (
ViewLiftIE,
ViewLiftEmbedIE,
)
from .viewster import ViewsterIE from .viewster import ViewsterIE
from .viidea import ViideaIE from .viidea import ViideaIE
from .vimeo import ( from .vimeo import (

@ -51,7 +51,7 @@ from .tnaflix import TNAFlixNetworkEmbedIE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from .dailymotion import DailymotionCloudIE from .dailymotion import DailymotionCloudIE
from .onionstudios import OnionStudiosIE from .onionstudios import OnionStudiosIE
from .snagfilms import SnagFilmsEmbedIE from .viewlift import ViewLiftEmbedIE
from .screenwavemedia import ScreenwaveMediaIE from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE from .pladform import PladformIE
@ -1924,10 +1924,10 @@ class GenericIE(InfoExtractor):
if onionstudios_url: if onionstudios_url:
return self.url_result(onionstudios_url) return self.url_result(onionstudios_url)
# Look for SnagFilms embeds # Look for ViewLift embeds
snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage) viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
if snagfilms_url: if viewlift_url:
return self.url_result(snagfilms_url) return self.url_result(viewlift_url)
# Look for JWPlatform embeds # Look for JWPlatform embeds
jwplatform_url = JWPlatformIE._extract_url(webpage) jwplatform_url = JWPlatformIE._extract_url(webpage)

@ -13,8 +13,12 @@ from ..utils import (
) )
class SnagFilmsEmbedIE(InfoExtractor): class ViewLiftBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|embed)\.)?snagfilms\.com/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' _DOMAINS_REGEX = '(?:snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|monumentalsportsnetwork|vayafilm)\.com|kesari\.tv'
class ViewLiftEmbedIE(ViewLiftBaseIE):
_VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f-]{36})' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{ _TESTS = [{
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500', 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
'md5': '2924e9215c6eff7a55ed35b72276bd93', 'md5': '2924e9215c6eff7a55ed35b72276bd93',
@ -40,7 +44,7 @@ class SnagFilmsEmbedIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
mobj = re.search( mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?snagfilms\.com/embed/player.+?)\1', r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
webpage) webpage)
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')
@ -55,6 +59,7 @@ class SnagFilmsEmbedIE(InfoExtractor):
'Film %s is not playable in your area.' % video_id, expected=True) 'Film %s is not playable in your area.' % video_id, expected=True)
formats = [] formats = []
has_bitrate = False
for source in self._parse_json(js_to_json(self._search_regex( for source in self._parse_json(js_to_json(self._search_regex(
r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id): r'(?s)sources:\s*(\[.+?\]),', webpage, 'json')), video_id):
file_ = source.get('file') file_ = source.get('file')
@ -63,22 +68,25 @@ class SnagFilmsEmbedIE(InfoExtractor):
type_ = source.get('type') type_ = source.get('type')
ext = determine_ext(file_) ext = determine_ext(file_)
format_id = source.get('label') or ext format_id = source.get('label') or ext
if all(v == 'm3u8' for v in (type_, ext)): if all(v == 'm3u8' or v == 'hls' for v in (type_, ext)):
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
file_, video_id, 'mp4', m3u8_id='hls')) file_, video_id, 'mp4', m3u8_id='hls'))
else: else:
bitrate = int_or_none(self._search_regex( bitrate = int_or_none(self._search_regex(
[r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext], [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
file_, 'bitrate', default=None)) file_, 'bitrate', default=None))
if not has_bitrate and bitrate:
has_bitrate = True
height = int_or_none(self._search_regex( height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)) r'^(\d+)[pP]$', format_id, 'height', default=None))
formats.append({ formats.append({
'url': file_, 'url': file_,
'format_id': format_id, 'format_id': 'http-%s%s' % (format_id, ('-%dk' % bitrate if bitrate else '')),
'tbr': bitrate, 'tbr': bitrate,
'height': height, 'height': height,
}) })
self._sort_formats(formats) field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
self._sort_formats(formats, field_preference)
title = self._search_regex( title = self._search_regex(
[r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'], [r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
@ -91,8 +99,8 @@ class SnagFilmsEmbedIE(InfoExtractor):
} }
class SnagFilmsIE(InfoExtractor): class ViewLiftIE(ViewLiftBaseIE):
_VALID_URL = r'https?://(?:www\.)?snagfilms\.com/(?:films/title|show)/(?P<id>[^?#]+)' _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)/(?:films/title|show|(?:news/)?videos?)/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{ _TESTS = [{
'url': 'http://www.snagfilms.com/films/title/lost_for_life', 'url': 'http://www.snagfilms.com/films/title/lost_for_life',
'md5': '19844f897b35af219773fd63bdec2942', 'md5': '19844f897b35af219773fd63bdec2942',
@ -127,10 +135,16 @@ class SnagFilmsIE(InfoExtractor):
# Film is not available. # Film is not available.
'url': 'http://www.snagfilms.com/show/augie_alone/flirting', 'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.winnersview.com/videos/the-good-son',
'only_matching': True,
}, {
'url': 'http://www.kesari.tv/news/video/1461919076414',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) domain, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
@ -170,7 +184,7 @@ class SnagFilmsIE(InfoExtractor):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'http://embed.snagfilms.com/embed/player?filmId=%s' % film_id, 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
'id': film_id, 'id': film_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
@ -178,4 +192,5 @@ class SnagFilmsIE(InfoExtractor):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'duration': duration, 'duration': duration,
'categories': categories, 'categories': categories,
'ie_key': 'ViewLiftEmbed',
} }
Loading…
Cancel
Save