[crackle] Bypass geo restriction

pull/8/head
Sergey M․ 7 years ago
parent b9f5a41207
commit 7d34016fb0
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

@ -4,20 +4,24 @@ from __future__ import unicode_literals, division
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import (
compat_str,
compat_HTTPError,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
ExtractorError
) )
class CrackleIE(InfoExtractor): class CrackleIE(InfoExtractor):
_GEO_COUNTRIES = ['US']
_VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
_TEST = { _TEST = {
# geo restricted to CA
'url': 'https://www.crackle.com/andromeda/2502343', 'url': 'https://www.crackle.com/andromeda/2502343',
'info_dict': { 'info_dict': {
'id': '2502343', 'id': '2502343',
@ -46,93 +50,115 @@ class CrackleIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
media = self._download_json( country_code = self._downloader.params.get('geo_bypass_country', None)
'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s' countries = [country_code] if country_code else (
% (video_id, self._GEO_COUNTRIES[0]), video_id, query={ 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
'disableProtocols': 'true',
'format': 'json'
})
title = media['Title'] last_e = None
formats = [] for country in countries:
for e in media['MediaURLs']: try:
if e.get('UseDRM') is True: media = self._download_json(
continue 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
format_url = e.get('Path') % (video_id, country), video_id,
if not format_url or not isinstance(format_url, compat_str): 'Downloading media JSON as %s' % country,
continue 'Unable to download media JSON', query={
ext = determine_ext(format_url) 'disableProtocols': 'true',
if ext == 'm3u8': 'format': 'json'
formats.extend(self._extract_m3u8_formats( })
format_url, video_id, 'mp4', entry_protocol='m3u8_native', except ExtractorError as e:
m3u8_id='hls', fatal=False)) # 401 means geo restriction, trying next country
elif ext == 'mpd': if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
formats.extend(self._extract_mpd_formats( last_e = e
format_url, video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats)
description = media.get('Description')
duration = int_or_none(media.get(
'DurationInSeconds')) or parse_duration(media.get('Duration'))
view_count = int_or_none(media.get('CountViews'))
average_rating = float_or_none(media.get('UserRating'))
age_limit = parse_age_limit(media.get('Rating'))
genre = media.get('Genre')
release_year = int_or_none(media.get('ReleaseYear'))
creator = media.get('Directors')
artist = media.get('Cast')
if media.get('MediaTypeDisplayValue') == 'Full Episode':
series = media.get('ShowName')
episode = title
season_number = int_or_none(media.get('Season'))
episode_number = int_or_none(media.get('Episode'))
else:
series = episode = season_number = episode_number = None
subtitles = {}
cc_files = media.get('ClosedCaptionFiles')
if isinstance(cc_files, list):
for cc_file in cc_files:
if not isinstance(cc_file, dict):
continue continue
cc_url = cc_file.get('Path') raise
if not cc_url or not isinstance(cc_url, compat_str):
media_urls = media.get('MediaURLs')
if not media_urls or not isinstance(media_urls, list):
continue
title = media['Title']
formats = []
for e in media['MediaURLs']:
if e.get('UseDRM') is True:
continue continue
lang = cc_file.get('Locale') or 'en' format_url = e.get('Path')
subtitles.setdefault(lang, []).append({'url': cc_url}) if not format_url or not isinstance(format_url, compat_str):
thumbnails = []
images = media.get('Images')
if isinstance(images, list):
for image_key, image_url in images.items():
mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
if not mobj:
continue continue
thumbnails.append({ ext = determine_ext(format_url)
'url': image_url, if ext == 'm3u8':
'width': int(mobj.group(1)), formats.extend(self._extract_m3u8_formats(
'height': int(mobj.group(2)), format_url, video_id, 'mp4', entry_protocol='m3u8_native',
}) m3u8_id='hls', fatal=False))
elif ext == 'mpd':
return { formats.extend(self._extract_mpd_formats(
'id': video_id, format_url, video_id, mpd_id='dash', fatal=False))
'title': title, self._sort_formats(formats)
'description': description,
'duration': duration, description = media.get('Description')
'view_count': view_count, duration = int_or_none(media.get(
'average_rating': average_rating, 'DurationInSeconds')) or parse_duration(media.get('Duration'))
'age_limit': age_limit, view_count = int_or_none(media.get('CountViews'))
'genre': genre, average_rating = float_or_none(media.get('UserRating'))
'creator': creator, age_limit = parse_age_limit(media.get('Rating'))
'artist': artist, genre = media.get('Genre')
'release_year': release_year, release_year = int_or_none(media.get('ReleaseYear'))
'series': series, creator = media.get('Directors')
'episode': episode, artist = media.get('Cast')
'season_number': season_number,
'episode_number': episode_number, if media.get('MediaTypeDisplayValue') == 'Full Episode':
'thumbnails': thumbnails, series = media.get('ShowName')
'subtitles': subtitles, episode = title
'formats': formats, season_number = int_or_none(media.get('Season'))
} episode_number = int_or_none(media.get('Episode'))
else:
series = episode = season_number = episode_number = None
subtitles = {}
cc_files = media.get('ClosedCaptionFiles')
if isinstance(cc_files, list):
for cc_file in cc_files:
if not isinstance(cc_file, dict):
continue
cc_url = cc_file.get('Path')
if not cc_url or not isinstance(cc_url, compat_str):
continue
lang = cc_file.get('Locale') or 'en'
subtitles.setdefault(lang, []).append({'url': cc_url})
thumbnails = []
images = media.get('Images')
if isinstance(images, list):
for image_key, image_url in images.items():
mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
if not mobj:
continue
thumbnails.append({
'url': image_url,
'width': int(mobj.group(1)),
'height': int(mobj.group(2)),
})
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'view_count': view_count,
'average_rating': average_rating,
'age_limit': age_limit,
'genre': genre,
'creator': creator,
'artist': artist,
'release_year': release_year,
'series': series,
'episode': episode,
'season_number': season_number,
'episode_number': episode_number,
'thumbnails': thumbnails,
'subtitles': subtitles,
'formats': formats,
}
raise last_e

Loading…
Cancel
Save