From a98bcbf43fb80716be6aa2d0b29dfc4832c09853 Mon Sep 17 00:00:00 2001 From: PetersonE1 Date: Mon, 30 Oct 2023 19:00:56 -0400 Subject: [PATCH] updated to include chromedriver-autoinstaller-fix to download the correct version of chromedriver for the latest chrome versions; env file and login required for scrape() after X requires an account to view Tweets/Xs --- Scweet/scweet.py | 6 ++++-- Scweet/utils.py | 4 ++-- requirements.txt | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Scweet/scweet.py b/Scweet/scweet.py index d541c79..eb5d693 100644 --- a/Scweet/scweet.py +++ b/Scweet/scweet.py @@ -6,11 +6,11 @@ import random import pandas as pd -from .utils import init_driver, get_last_date_from_csv, log_search_page, keep_scroling, dowload_images +from .utils import init_driver, get_last_date_from_csv, log_search_page, keep_scroling, dowload_images, log_in -def scrape(since, until=None, words=None, to_account=None, from_account=None, mention_account=None, interval=5, lang=None, +def scrape(env, since, until=None, words=None, to_account=None, from_account=None, mention_account=None, interval=5, lang=None, headless=True, limit=float("inf"), display_type="Top", resume=False, proxy=None, hashtag=None, show_images=False, save_images=False, save_dir="outputs", filter_replies=False, proximity=False, geocode=None, minreplies=None, minlikes=None, minretweets=None): @@ -69,6 +69,8 @@ def scrape(since, until=None, words=None, to_account=None, from_account=None, me show_images = True # initiate the driver driver = init_driver(headless, proxy, show_images) + # logs in since X now requires an account to view + log_in(driver, env) # resume scraping from previous work if resume: since = str(get_last_date_from_csv(path))[:10] diff --git a/Scweet/utils.py b/Scweet/utils.py index 3048cee..85ac389 100644 --- a/Scweet/utils.py +++ b/Scweet/utils.py @@ -3,7 +3,7 @@ import re from time import sleep import random -import chromedriver_autoinstaller +import chromedriver_autoinstaller_fix import geckodriver_autoinstaller from selenium.common.exceptions import NoSuchElementException from selenium import webdriver @@ -129,7 +129,7 @@ def init_driver(headless=True, proxy=None, show_images=False, option=None, firef driver_path = geckodriver_autoinstaller.install() else: options = ChromeOptions() - driver_path = chromedriver_autoinstaller.install() + driver_path = chromedriver_autoinstaller_fix.install() if headless is True: print("Scraping on headless mode.") diff --git a/requirements.txt b/requirements.txt index 80406d7..6ef93af 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,6 @@ selenium==4.2.0 pandas python-dotenv chromedriver-autoinstaller +chromedriver-autoinstaller-fix geckodriver-autoinstaller urllib3