Skip to content

Commit

Permalink
Merge pull request #5 from shaikhsajid1111/retweet-incorrect-link-fix
Browse files Browse the repository at this point in the history
Retweet incorrect link fix and upgraded version
  • Loading branch information
shaikhsajid1111 authored Apr 3, 2022
2 parents 60616a7 + 1036599 commit 69a758d
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 17 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

setuptools.setup(
name="twitter_scraper_selenium",
version="0.1.2",
version="0.1.3",
author="Sajid Shaikh",
author_email="[email protected]",
description="Python package to scrap twitter's front-end easily with selenium",
Expand Down
16 changes: 8 additions & 8 deletions twitter_scraper_selenium/element_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,8 @@ def __find_shares(tweet):
@staticmethod
def __find_status(tweet):
try:
anchors = Finder.__find_all_anchor_tags(tweet)
status = "NA"
if len(anchors) > 2:
status = anchors[3].get_attribute("href").split("/")
return status
anchor = tweet.find_element_by_css_selector("a.r-bcqeeo.r-3s2u2q.r-qvutc0")
return (anchor.get_attribute("href").split("/"), anchor.get_attribute("href"))
except Exception as ex:
print("Error at method find_status on line no. {} : {}".format(frameinfo.f_lineno, ex))
return []
Expand Down Expand Up @@ -132,7 +129,7 @@ def __find_videos(tweet):
@staticmethod
def __is_retweet(tweet):
try:
tweet.find_element_by_css_selector('[role="presentation"]')
tweet.find_element_by_css_selector('div.r-92ng3h.r-qvutc0')
return True
except NoSuchElementException:
return False
Expand All @@ -142,12 +139,15 @@ def __is_retweet(tweet):
return False

@staticmethod
def __find_name_from_post(tweet):
def __find_name_from_post(tweet,is_retweet=False):
try:
name = "NA"
anchors = Finder.__find_all_anchor_tags(tweet)
if len(anchors) > 2:
name = anchors[1].text.split("\n")[0]
if is_retweet:
name = anchors[2].text.strip()
else:
name = anchors[1].text.split("\n")[0]
return name
except Exception as ex:
print("Error at method __find_name_from_post on line no. {} : {}".format(
Expand Down
5 changes: 2 additions & 3 deletions twitter_scraper_selenium/keyword.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,10 @@ def __fetch_and_store_data(self):
while len(self.posts_data) < self.tweets_count:
for tweet in present_tweets:
name = Finder._Finder__find_name_from_post(tweet)
status = Finder._Finder__find_status(tweet)
status,tweet_url = Finder._Finder__find_status(tweet)
replies = Finder._Finder__find_replies(tweet)
retweets = Finder._Finder__find_shares(tweet)
username = status[3]
username = tweet_url.split("/")[3]
status = status[-1]
is_retweet = Finder._Finder__is_retweet(tweet)
posted_time = Finder._Finder__find_timestamp(tweet)
Expand All @@ -66,7 +66,6 @@ def __fetch_and_store_data(self):
hashtags = re.findall(r"#(\w+)", content)
mentions = re.findall(r"@(\w+)", content)
profile_picture = "https://twitter.com/{}/photo".format(username)
tweet_url = "https://twitter.com/{}/status/{}".format(username,status)
link = Finder._Finder__find_external_link(tweet)

self.posts_data[status] = {
Expand Down
9 changes: 4 additions & 5 deletions twitter_scraper_selenium/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ def __fetch_and_store_data(self):

while len(self.posts_data) < self.tweets_count:
for tweet in present_tweets:
status = Finder._Finder__find_status(tweet)
name = Finder._Finder__find_name_from_post(tweet)
status,tweet_url = Finder._Finder__find_status(tweet)
replies = Finder._Finder__find_replies(tweet)
retweets = Finder._Finder__find_shares(tweet)
username = status[3]
status = status[-1]
username = tweet_url.split("/")[3]
is_retweet = True if self.twitter_username.lower() != username.lower() else False
retweet_link = Finder._Finder__find_all_anchor_tags(tweet)[2].get_attribute("href") if is_retweet is True else ""
name = Finder._Finder__find_name_from_post(tweet,is_retweet)
retweet_link = tweet_url if is_retweet is True else ""
posted_time = Finder._Finder__find_timestamp(tweet)
content = Finder._Finder__find_content(tweet)
likes = Finder._Finder__find_like(tweet)
Expand All @@ -66,7 +66,6 @@ def __fetch_and_store_data(self):
hashtags = re.findall(r"#(\w+)", content)
mentions = re.findall(r"@(\w+)", content)
profile_picture = "https://twitter.com/{}/photo".format(username)
tweet_url = "https://twitter.com/{}/status/{}".format(username,status)
link = Finder._Finder__find_external_link(tweet)
self.posts_data[status] = {
"tweet_id" : status,
Expand Down

0 comments on commit 69a758d

Please sign in to comment.