-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathraw_indeed.py
36 lines (31 loc) · 1.16 KB
/
raw_indeed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import asyncio
from bs4 import BeautifulSoup as Soup
from seleniumbase.core import sb_cdp
from seleniumbase.undetected import cdp_driver
url = "https://www.indeed.com/companies"
loop = asyncio.new_event_loop()
driver = cdp_driver.cdp_util.start_sync()
page = loop.run_until_complete(driver.get(url))
sb = sb_cdp.CDPMethods(loop, page, driver)
company = "NASA Jet Propulsion Laboratory"
search_box = 'input[data-testid="company-search-box"]'
captcha_grid = '[style="display: grid;"]'
if sb.is_element_present(captcha_grid):
sb.gui_click_element(captcha_grid)
sb.press_keys(search_box, company)
sb.click('button[type="submit"]')
if not sb.is_element_present('a:contains("%s")' % company):
if sb.is_element_present(captcha_grid):
sb.gui_click_element(captcha_grid)
sb.click('a:contains("%s")' % company)
sb.sleep(3)
sb.highlight('div[itemprop="name"]')
sb.sleep(1)
sb.highlight('h2:contains("About the company")')
sb.sleep(2)
for i in range(10):
sb.scroll_down(12)
sb.sleep(0.14)
info = sb.find_element('[data-testid="AboutSection-section"]')
soup = Soup(info.get_html()).get_text("\n").strip()
print("*** %s: ***\n%s" % (company, soup.replace("\n:", ":")))