diff --git a/craigslist/base.py b/craigslist/base.py index a12f928..08a303c 100644 --- a/craigslist/base.py +++ b/craigslist/base.py @@ -126,8 +126,7 @@ def set_logger(self, log_level, init=False): def is_valid_area(self, area): base_url = self.url_templates['base'] - response = utils.requests_get(base_url % {'site': self.site}, - logger=self.logger) + response = utils.requests_get(base_url % {'site': self.site}) soup = utils.bs(response.content) sublinks = soup.find('ul', {'class': 'sublinks'}) return sublinks and sublinks.find('a', text=area) is not None @@ -143,8 +142,7 @@ def get_results_approx_count(self, soup=None): """ if soup is None: - response = utils.requests_get(self.url, params=self.filters, - logger=self.logger) + response = utils.requests_get(self.url, params=self.filters) self.logger.info('GET %s', response.url) self.logger.info('Response code: %s', response.status_code) response.raise_for_status() # Something failed? @@ -177,8 +175,7 @@ def get_results(self, limit=None, start=0, sort_by=None, geotagged=False, while True: self.filters['s'] = start - response = utils.requests_get(self.url, params=self.filters, - logger=self.logger) + response = utils.requests_get(self.url, params=self.filters) self.logger.info('GET %s', response.url) self.logger.info('Response code: %s', response.status_code) response.raise_for_status() # Something failed? @@ -364,7 +361,7 @@ def parse_attrs(self, result): break def fetch_content(self, url): - response = utils.requests_get(url, logger=self.logger) + response = utils.requests_get(url) self.logger.info('GET %s', response.url) self.logger.info('Response code: %s', response.status_code) diff --git a/craigslist/utils.py b/craigslist/utils.py index 18c4f9c..0971de2 100644 --- a/craigslist/utils.py +++ b/craigslist/utils.py @@ -7,6 +7,11 @@ USER_AGENT = 'Mozilla/5.0' +session = requests.Session() +retries = requests.adapters.Retry(total=5, backoff_factor=0.1) +session.mount('http://', requests.adapters.HTTPAdapter(max_retries=retries)) + + def bs(content): return BeautifulSoup(content, 'html.parser') @@ -24,20 +29,14 @@ def requests_get(*args, **kwargs): a timeout). """ - logger = kwargs.pop('logger', None) # Set default User-Agent header if not defined. kwargs.setdefault('headers', {}).setdefault('User-Agent', USER_AGENT) - try: - return requests.get(*args, **kwargs) - except RequestException as exc: - if logger: - logger.warning('Request failed (%s). Retrying ...', exc) - return requests.get(*args, **kwargs) + return session.get(*args, **kwargs) def get_all_sites(): - response = requests.get(ALL_SITES_URL) + response = requests_get(ALL_SITES_URL) response.raise_for_status() # Something failed? soup = BeautifulSoup(response.content, 'html.parser') sites = set() @@ -52,7 +51,7 @@ def get_all_sites(): def get_all_areas(site): - response = requests.get(SITE_URL % site) + response = requests_get(SITE_URL % site) response.raise_for_status() # Something failed? soup = BeautifulSoup(response.content, 'html.parser') raw = soup.select('ul.sublinks li a')