diff --git a/scrape_linkedin/Company.py b/scrape_linkedin/Company.py index 26f1b6f..cd21ecf 100644 --- a/scrape_linkedin/Company.py +++ b/scrape_linkedin/Company.py @@ -29,25 +29,25 @@ def overview(self): # Banner containing company Name + Location banner = one_or_default( - self.overview_soup, '.org-top-card') - + self.overview_soup, 'section.org-top-card') + # Main container with company overview info container = one_or_default( - self.overview_soup, '.org-grid__core-rail--wide') - + self.overview_soup, 'section.artdeco-card.p4.mb3') + overview = {} overview['description'] = container.select_one( 'section > p').get_text().strip() - + metadata_keys = container.select('.org-page-details__definition-term') - print(metadata_keys) + # print(metadata_keys) metadata_keys = [ x for x in metadata_keys if "Company size" not in x.get_text()] - print(metadata_keys) + # print(metadata_keys) metadata_values = container.select( '.org-page-details__definition-text') overview.update( - get_info(banner, {'name': '.org-top-card-summary__title'})) # A fix to the name selector + get_info(banner, {'name': '.t-24.t-black.t-bold'})) # A fix to the name selector overview.update( get_info(container, {'company_size': '.org-about-company-module__company-size-definition-text'})) # Manually added Company size @@ -56,10 +56,10 @@ def overview(self): dict_val = val.get_text().strip() if "company_size" not in dict_key: overview[dict_key] = dict_val - print(overview) + # print(overview) all_employees_links = all_or_default( - banner, '.mt2 > a > span') # A fix to locate "See all ### employees on LinkedIn" + banner, '.mt1 > div > a:nth-of-type(2) > span') # A fix to locate "See all ### employees on LinkedIn" if all_employees_links: all_employees_text = all_employees_links[-1].text diff --git a/scrape_linkedin/CompanyScraper.py b/scrape_linkedin/CompanyScraper.py index 9013c38..ff84cd0 100644 --- a/scrape_linkedin/CompanyScraper.py +++ b/scrape_linkedin/CompanyScraper.py @@ -55,13 +55,13 @@ def load_initial(self, company): def get_overview(self): try: - tab_link = self.driver.find_element_by_css_selector( - 'a[data-control-name="page_member_main_nav_about_tab"]') + tab_link = self.driver.find_elements_by_css_selector( + 'li.org-page-navigation__item.m0')[1] tab_link.click() self.wait_for_el( - 'a[data-control-name="page_member_main_nav_about_tab"].active') + 'section.artdeco-card.p4.mb3') return self.driver.find_element_by_css_selector( - '.organization-outlet').get_attribute('outerHTML') + 'div.scaffold-layout__row.scaffold-layout__content').get_attribute('outerHTML') except: return ''