diff --git a/conf_template.py b/conf_template.py index 4ade435..2fff0d1 100644 --- a/conf_template.py +++ b/conf_template.py @@ -3,10 +3,11 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -logging.getLogger('scrapy').setLevel(logging.WARNING) +logging.getLogger('scrapy').setLevel(logging.FATAL) -logging.getLogger('selenium').setLevel(logging.WARNING) +logging.getLogger('selenium').setLevel(logging.FATAL) +logging.getLogger('urllib3').setLevel(logging.FATAL) EMAIL = '' PASSWORD = '' diff --git a/linkedin/middlewares.py b/linkedin/middlewares.py index e650ca5..a07a03e 100644 --- a/linkedin/middlewares.py +++ b/linkedin/middlewares.py @@ -20,7 +20,7 @@ def process_request(self, request, spider): get_by_xpath(driver, profile_xpath) print('SeleniumMiddleware - retrieving body') - # body = to_bytes(driver.page_source) # body must be of type bytes - return Response(driver.current_url) - # HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request) + body = to_bytes(driver.page_source) # body must be of type bytes + #return Response(driver.current_url) + return HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request) diff --git a/linkedin/spiders/selenium.py b/linkedin/spiders/selenium.py index 9348ee1..b7064be 100644 --- a/linkedin/spiders/selenium.py +++ b/linkedin/spiders/selenium.py @@ -113,7 +113,7 @@ def extracts_see_all_url(driver): :return: String: The "See All" URL. """ print('Searching for the "See all * employees on LinkedIn" btn') - see_all_xpath = f'//a/strong[starts-with(text(),"{SEE_ALL_PLACEHOLDER}")]' + see_all_xpath = f'//*[starts-with(text(),"{SEE_ALL_PLACEHOLDER}")]' see_all_elem = get_by_xpath(driver, see_all_xpath) see_all_ex_text = see_all_elem.text @@ -135,7 +135,7 @@ def extracts_linkedin_users(driver, company): for i in range(1, 11): print(f'loading {i}th user') - last_result_xpath = f'//li[{i}]/div/div[@class="search-result__wrapper"]' + last_result_xpath = f'//li[{i}]/*/div[@class="search-result__wrapper"]' result = get_by_xpath_or_none(driver, last_result_xpath) if result is not None: