[extractor/common] Allow calling _initialize_geo_bypass from extractors (#11970)

This commit is contained in:
Sergey M․ 2017-02-21 23:00:43 +07:00
parent e469ab2528
commit e39b5d4ab8
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -379,12 +379,31 @@ def working(cls):
def initialize(self): def initialize(self):
"""Initializes an instance (authentication, etc).""" """Initializes an instance (authentication, etc)."""
self.__initialize_geo_bypass() self._initialize_geo_bypass(self._GEO_COUNTRIES)
if not self._ready: if not self._ready:
self._real_initialize() self._real_initialize()
self._ready = True self._ready = True
def __initialize_geo_bypass(self): def _initialize_geo_bypass(self, countries):
"""
Initialize geo restriction bypass mechanism.
This method is used to initialize geo bypass mechanism based on faking
X-Forwarded-For HTTP header. A random country from provided country list
is selected and a random IP brlonging to this country is generated. This
IP will be passed as X-Forwarded-For HTTP header in all subsequent
HTTP requests.
Method does nothing if no countries are specified.
This method will be used for initial geo bypass mechanism initialization
during the instance initialization with _GEO_COUNTRIES.
You may also manually call it from extractor's code if geo countries
information is not available beforehand (e.g. obtained during
extraction) or due to some another reason.
"""
if not countries:
return
if not self._x_forwarded_for_ip: if not self._x_forwarded_for_ip:
country_code = self._downloader.params.get('geo_bypass_country', None) country_code = self._downloader.params.get('geo_bypass_country', None)
# If there is no explicit country for geo bypass specified and # If there is no explicit country for geo bypass specified and
@ -393,8 +412,8 @@ def __initialize_geo_bypass(self):
if (not country_code and if (not country_code and
self._GEO_BYPASS and self._GEO_BYPASS and
self._downloader.params.get('geo_bypass', True) and self._downloader.params.get('geo_bypass', True) and
self._GEO_COUNTRIES): countries):
country_code = random.choice(self._GEO_COUNTRIES) country_code = random.choice(countries)
if country_code: if country_code:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
if self._downloader.params.get('verbose', False): if self._downloader.params.get('verbose', False):