Improve geo bypass mechanism

* Rename options to preffixly match with --geo-verification-proxy
* Introduce _GEO_COUNTRIES for extractors
* Implement faking IP right away for sites with known geo restriction
pull/10807/head
Sergey M․ 8 years ago committed by Sergey M
parent 0a840f584c
commit 4248dad92b

@ -323,10 +323,15 @@ class InfoExtractor(object):
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
_BYPASS_GEO attribute may be set to False in order to disable
_GEO_BYPASS attribute may be set to False in order to disable
geo restriction bypass mechanisms for a particular extractor.
Though it won't disable explicit geo restriction bypass based on
country code provided with geo_bypass_country.
country code provided with geo_bypass_country. (experimental)
_GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
countries for this extractor. One of these countries will be used by
geo restriction bypass mechanism right away in order to bypass
geo restriction, of course, if the mechanism is not disabled. (experimental)
Finally, the _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
@ -335,7 +340,8 @@ class InfoExtractor(object):
_ready = False
_downloader = None
_x_forwarded_for_ip = None
_BYPASS_GEO = True
_GEO_BYPASS = True
_GEO_COUNTRIES = None
_WORKING = True
def __init__(self, downloader=None):
@ -370,13 +376,27 @@ class InfoExtractor(object):
def initialize(self):
"""Initializes an instance (authentication, etc)."""
self.__initialize_geo_bypass()
if not self._ready:
self._real_initialize()
self._ready = True
def __initialize_geo_bypass(self):
if not self._x_forwarded_for_ip:
country_code = self._downloader.params.get('geo_bypass_country', None)
# If there is no explicit country for geo bypass specified and
# the extractor is known to be geo restricted let's fake IP
# as X-Forwarded-For right away.
if (not country_code and
self._GEO_BYPASS and
self._downloader.params.get('geo_bypass', True) and
self._GEO_COUNTRIES):
country_code = random.choice(self._GEO_COUNTRIES)
if country_code:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
if not self._ready:
self._real_initialize()
self._ready = True
if self._downloader.params.get('verbose', False):
self._downloader.to_stdout(
'[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
@ -389,16 +409,8 @@ class InfoExtractor(object):
ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
return ie_result
except GeoRestrictedError as e:
if (not self._downloader.params.get('geo_bypass_country', None) and
self._BYPASS_GEO and
self._downloader.params.get('geo_bypass', True) and
not self._x_forwarded_for_ip and
e.countries):
self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
if self._x_forwarded_for_ip:
self.report_warning(
'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
continue
if self.__maybe_fake_ip_and_retry(e.countries):
continue
raise
except ExtractorError:
raise
@ -407,6 +419,19 @@ class InfoExtractor(object):
except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occurred.', cause=e)
def __maybe_fake_ip_and_retry(self, countries):
if (not self._downloader.params.get('geo_bypass_country', None) and
self._GEO_BYPASS and
self._downloader.params.get('geo_bypass', True) and
not self._x_forwarded_for_ip and
countries):
self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries))
if self._x_forwarded_for_ip:
self.report_warning(
'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
return True
return False
def set_downloader(self, downloader):
"""Sets the downloader for this IE."""
self._downloader = downloader

@ -20,6 +20,7 @@ from ..utils import (
class DramaFeverBaseIE(AMPIE):
_LOGIN_URL = 'https://www.dramafever.com/accounts/login/'
_NETRC_MACHINE = 'dramafever'
_GEO_COUNTRIES = ['US', 'CA']
_CONSUMER_SECRET = 'DA59dtVXYLxajktV'
@ -118,7 +119,7 @@ class DramaFeverIE(DramaFeverBaseIE):
if isinstance(e.cause, compat_HTTPError):
self.raise_geo_restricted(
msg='Currently unavailable in your country',
countries=['US', 'CA'])
countries=self._GEO_COUNTRIES)
raise
series_id, episode_number = video_id.split('.')

@ -37,6 +37,7 @@ class GoIE(AdobePassIE):
}
}
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
_GEO_COUNTRIES = ['US']
_TESTS = [{
'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx',
'info_dict': {
@ -104,7 +105,7 @@ class GoIE(AdobePassIE):
for error in errors:
if error.get('code') == 1002:
self.raise_geo_restricted(
error['message'], countries=['US'])
error['message'], countries=self._GEO_COUNTRIES)
error_message = ', '.join([error['message'] for error in errors])
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
asset_url += '?' + entitlement['uplynkData']['sessionKey']

@ -24,6 +24,7 @@ from ..utils import (
class ITVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
_GEO_COUNTRIES = ['GB']
_TEST = {
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
'info_dict': {
@ -101,7 +102,8 @@ class ITVIE(InfoExtractor):
fault_code = xpath_text(resp_env, './/faultcode')
fault_string = xpath_text(resp_env, './/faultstring')
if fault_code == 'InvalidGeoRegion':
self.raise_geo_restricted(msg=fault_string, countries=['GB'])
self.raise_geo_restricted(
msg=fault_string, countries=self._GEO_COUNTRIES)
raise ExtractorError('%s said: %s' % (self.IE_NAME, fault_string))
title = xpath_text(playlist, 'EpisodeTitle', fatal=True)
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)

@ -14,6 +14,7 @@ from ..utils import (
class NRKBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['NO']
def _real_extract(self, url):
video_id = self._match_id(url)
@ -93,7 +94,8 @@ class NRKBaseIE(InfoExtractor):
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type:
self.raise_geo_restricted(
msg=MESSAGES.get('ProgramIsGeoBlocked'), countries=['NO'])
msg=MESSAGES.get('ProgramIsGeoBlocked'),
countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
message_type, message_type)),

@ -10,6 +10,7 @@ from ..utils import (
class OnDemandKoreaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ondemandkorea\.com/(?P<id>[^/]+)\.html'
_GEO_COUNTRIES = ['US', 'CA']
_TEST = {
'url': 'http://www.ondemandkorea.com/ask-us-anything-e43.html',
'info_dict': {
@ -36,7 +37,7 @@ class OnDemandKoreaIE(InfoExtractor):
if 'msg_block_01.png' in webpage:
self.raise_geo_restricted(
msg='This content is not available in your region',
countries=['US', 'CA'])
countries=self._GEO_COUNTRIES)
if 'This video is only available to ODK PLUS members.' in webpage:
raise ExtractorError(

@ -193,6 +193,8 @@ class PBSIE(InfoExtractor):
)
''' % '|'.join(list(zip(*_STATIONS))[0])
_GEO_COUNTRIES = ['US']
_TESTS = [
{
'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
@ -492,7 +494,8 @@ class PBSIE(InfoExtractor):
message = self._ERRORS.get(
redirect_info['http_code'], redirect_info['message'])
if redirect_info['http_code'] == 403:
self.raise_geo_restricted(msg=message, countries=['US'])
self.raise_geo_restricted(
msg=message, countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, message), expected=True)

@ -14,7 +14,8 @@ from ..utils import (
class SRGSSRIE(InfoExtractor):
_VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)'
_BYPASS_GEO = False
_GEO_BYPASS = False
_GEO_COUNTRIES = ['CH']
_ERRORS = {
'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
@ -43,7 +44,8 @@ class SRGSSRIE(InfoExtractor):
if media_data.get('block') and media_data['block'] in self._ERRORS:
message = self._ERRORS[media_data['block']]
if media_data['block'] == 'GEOBLOCK':
self.raise_geo_restricted(msg=message, countries=['CH'])
self.raise_geo_restricted(
msg=message, countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, message), expected=True)

@ -13,6 +13,7 @@ from ..utils import (
class SVTBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['SE']
def _extract_video(self, video_info, video_id):
formats = []
for vr in video_info['videoReferences']:
@ -39,7 +40,8 @@ class SVTBaseIE(InfoExtractor):
})
if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
self.raise_geo_restricted(
'This video is only available in Sweden', countries=['SE'])
'This video is only available in Sweden',
countries=self._GEO_COUNTRIES)
self._sort_formats(formats)
subtitles = {}

@ -20,6 +20,7 @@ class Vbox7IE(InfoExtractor):
)
(?P<id>[\da-fA-F]+)
'''
_GEO_COUNTRIES = ['BG']
_TESTS = [{
'url': 'http://vbox7.com/play:0946fff23c',
'md5': 'a60f9ab3a3a2f013ef9a967d5f7be5bf',
@ -78,7 +79,7 @@ class Vbox7IE(InfoExtractor):
video_url = video['src']
if '/na.mp4' in video_url:
self.raise_geo_restricted(countries=['BG'])
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
uploader = video.get('uploader')

@ -14,7 +14,7 @@ from ..utils import (
class VGTVIE(XstreamIE):
IE_DESC = 'VGTV, BTTV, FTV, Aftenposten and Aftonbladet'
_BYPASS_GEO = False
_GEO_BYPASS = False
_HOST_TO_APPNAME = {
'vgtv.no': 'vgtv',
@ -218,7 +218,8 @@ class VGTVIE(XstreamIE):
properties = try_get(
data, lambda x: x['streamConfiguration']['properties'], list)
if properties and 'geoblocked' in properties:
raise self.raise_geo_restricted(countries=['NO'])
raise self.raise_geo_restricted(
countries=[host.rpartition('.')[-1].partition('/')[0].upper()])
self._sort_formats(info['formats'])

@ -27,7 +27,7 @@ class VikiBaseIE(InfoExtractor):
_APP_VERSION = '2.2.5.1428709186'
_APP_SECRET = '-$iJ}@p7!G@SyU/je1bEyWg}upLu-6V6-Lg9VD(]siH,r.,m-r|ulZ,U4LC/SeR)'
_BYPASS_GEO = False
_GEO_BYPASS = False
_NETRC_MACHINE = 'viki'
_token = None

@ -3291,7 +3291,7 @@ class GeoUtils(object):
addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
addr_max = addr_min | (0xffffffff >> int(preflen))
return compat_str(socket.inet_ntoa(
compat_struct_pack('!I', random.randint(addr_min, addr_max))))
compat_struct_pack('!L', random.randint(addr_min, addr_max))))
class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):

Loading…
Cancel
Save