Skip to content

Commit

Permalink
fixed linkedin spider, minimized logging quantity
Browse files Browse the repository at this point in the history
  • Loading branch information
eracle committed Dec 9, 2019
1 parent b0ba5cf commit 9a8e919
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 7 deletions.
5 changes: 3 additions & 2 deletions conf_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

logging.getLogger('scrapy').setLevel(logging.WARNING)
logging.getLogger('scrapy').setLevel(logging.FATAL)

logging.getLogger('selenium').setLevel(logging.WARNING)
logging.getLogger('selenium').setLevel(logging.FATAL)

logging.getLogger('urllib3').setLevel(logging.FATAL)

EMAIL = ''
PASSWORD = ''
6 changes: 3 additions & 3 deletions linkedin/middlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def process_request(self, request, spider):
get_by_xpath(driver, profile_xpath)

print('SeleniumMiddleware - retrieving body')
# body = to_bytes(driver.page_source) # body must be of type bytes
return Response(driver.current_url)
# HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request)
body = to_bytes(driver.page_source) # body must be of type bytes
#return Response(driver.current_url)
return HtmlResponse(driver.current_url, body=body, encoding='utf-8', request=request)

4 changes: 2 additions & 2 deletions linkedin/spiders/selenium.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def extracts_see_all_url(driver):
:return: String: The "See All" URL.
"""
print('Searching for the "See all * employees on LinkedIn" btn')
see_all_xpath = f'//a/strong[starts-with(text(),"{SEE_ALL_PLACEHOLDER}")]'
see_all_xpath = f'//*[starts-with(text(),"{SEE_ALL_PLACEHOLDER}")]'
see_all_elem = get_by_xpath(driver, see_all_xpath)
see_all_ex_text = see_all_elem.text

Expand All @@ -135,7 +135,7 @@ def extracts_linkedin_users(driver, company):
for i in range(1, 11):
print(f'loading {i}th user')

last_result_xpath = f'//li[{i}]/div/div[@class="search-result__wrapper"]'
last_result_xpath = f'//li[{i}]/*/div[@class="search-result__wrapper"]'

result = get_by_xpath_or_none(driver, last_result_xpath)
if result is not None:
Expand Down

0 comments on commit 9a8e919

Please sign in to comment.