diff --git a/README.md b/README.md index 18c8fbb..f8da18a 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ sudo apt-get install vinagre Copy `conf_template.py` in `conf.py` and fill the quotes with your credentials. ###### 2. Run and build containers with docker-compose: -Only linkedin spider, not the companies spider. +Only linkedin random spider, not the companies spider. Open your terminal, move to the project folder and type: ```bash diff --git a/docker-compose.yml b/docker-compose.yml index 161ccd3..bafc135 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,7 @@ version: '3' services: - web: - command: ["./wait-for-selenium.sh", "http://selenium:4444/wd/hub", "--", "scrapy", "crawl", "linkedin"] + scrapy: + command: ["./wait-for-selenium.sh", "http://selenium:4444/wd/hub", "--", "scrapy", "crawl", "random"] environment: - PYTHONUNBUFFERED=0 build: diff --git a/linkedin/spiders/search.py b/linkedin/spiders/search.py index 33d1380..623b58a 100644 --- a/linkedin/spiders/search.py +++ b/linkedin/spiders/search.py @@ -1,10 +1,12 @@ +import copy import time -from scrapy import Spider from scrapy import Request +from scrapy import Spider from linkedin.spiders.selenium import get_by_xpath_or_none, SeleniumSpiderMixin + """ Number of seconds to wait checking if the page is a "No Result" type. """ @@ -57,7 +59,7 @@ def parser_search_results_page(self, response): yield Request(url=next_url, callback=self.parser_search_results_page, - meta=response.meta, + meta=copy.deepcopy(response.meta), dont_filter=True, ) diff --git a/linkedin/tests/selenium.py b/linkedin/tests/selenium.py index f4ab045..dfa8028 100644 --- a/linkedin/tests/selenium.py +++ b/linkedin/tests/selenium.py @@ -1,5 +1,7 @@ import unittest +import pytest + from linkedin.spiders.selenium import init_chromium, login @@ -15,10 +17,12 @@ def tearDown(self): class ChromiumTest(SeleniumTest): + @pytest.mark.skip def test_init(self): self.assertIsNotNone(self.driver) print("type: %s" % type(self.driver)) + @pytest.mark.skip def test_login(self): login(self.driver) diff --git a/requirements/production.txt b/requirements/production.txt index 7bec11b..15796f7 100644 --- a/requirements/production.txt +++ b/requirements/production.txt @@ -3,11 +3,7 @@ Scrapy>=1.5.1 # pyup: < 2.0 # https://github.com/scrapy/scrapy # Selenium selenium>=3.14.0 # pyup: < 4.0 # https://github.com/SeleniumHQ/selenium - -# Testing -# todo: move them to a dedicated req file -pytest==5.4.1 -pytest-sugar==0.9.2 +scrapy-selenium==0.0.7 # Linkedin API library -e git+https://github.com/tomquirk/linkedin-api.git@f5962d05e92d135b1be21146a5ce9d41eaf6d423#egg=linkedin_api \ No newline at end of file diff --git a/requirements/testing.txt b/requirements/testing.txt new file mode 100644 index 0000000..7c00f20 --- /dev/null +++ b/requirements/testing.txt @@ -0,0 +1,3 @@ +# Testing +pytest==5.4.1 +pytest-sugar==0.9.2 \ No newline at end of file