From d0949df933f098a7d0aa086f54a0d65af5df891b Mon Sep 17 00:00:00 2001 From: Ed Summers Date: Thu, 25 Jun 2020 14:33:20 -0400 Subject: [PATCH] Retweets changes This commit changes the behavior of the command line client so that it can also fetch retweets from a file of tweet_ids. twarc retweets ids.txt > retweets.jsonl This is in addition to the previous behavior where it accepted a tweet id to fetch the retweets for: twarc retweets 20 > retweets.jsonl You can also comma separate them: twarc retweets 20,21 > retweets.jsonl The internal interface to the Twarc.reteets has changed to now accept an iterator of tweet ids. from twarc import Twarc twitter = Twarc() for tweet in twitter.retweets([20, 21]): print(tweet['id_str']) ... If you have been using the retweets method in your code you will want to adjust it to pass in a list of ids rather than the bare ids. Fixes #255 --- .gitignore | 1 + README.md | 4 ++++ setup.py | 2 +- test_twarc.py | 8 +++++++- twarc/__init__.py | 2 +- twarc/client.py | 28 ++++++++++++++++++---------- twarc/command.py | 10 +++++++++- 7 files changed, 41 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index ee5cb0b9..a36adc0b 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ dist twarc.egg-info .pytest_cache .vscode +.env diff --git a/README.md b/README.md index 0b850fd0..fbb332aa 100644 --- a/README.md +++ b/README.md @@ -238,6 +238,10 @@ You can get retweets for a given tweet id like so: twarc retweets 824077910927691778 > retweets.jsonl +If you have of tweet_ids that you would like to fetch the retweets for you can: + + twarc retweets ids.txt > retweets.jsonl + ### Replies Unfortunately Twitter's API does not currently support getting replies to a diff --git a/setup.py b/setup.py index d8648b86..6d560cfd 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ from setuptools import setup # Also in twarc/__init__.py -__version__ = '1.9.1' +__version__ = '1.10.0' with open("README.md") as f: long_description = f.read() diff --git a/test_twarc.py b/test_twarc.py index 55559ea8..d786033f 100644 --- a/test_twarc.py +++ b/test_twarc.py @@ -537,7 +537,13 @@ def test_http_error_filter(): def test_retweets(): - assert len(list(T.retweets('795972820413140992'))) == 2 + # hopefully there will continue to be more than 100 retweets of these + assert len(list(T.retweets(['20', '21']))) > 100 + + +def test_missing_retweets(): + # this tweet doesn't exist and cannot have any retweets + assert len(list(T.retweets(['795972820413140991']))) == 0 def test_oembed(): diff --git a/twarc/__init__.py b/twarc/__init__.py index abc117db..9c49fab2 100644 --- a/twarc/__init__.py +++ b/twarc/__init__.py @@ -1,4 +1,4 @@ -__version__ = '1.9.1' # also in setup.py +__version__ = '1.10.0' # also in setup.py from .client import Twarc from .command import main diff --git a/twarc/client.py b/twarc/client.py index 99daa245..66ac9058 100644 --- a/twarc/client.py +++ b/twarc/client.py @@ -656,18 +656,26 @@ def tweet(self, tweet_id): except StopIteration: return [] - def retweets(self, tweet_id): + def retweets(self, tweet_ids): """ - Retrieves up to the last 100 retweets for the provided - tweet. + Retrieves up to the last 100 retweets for the provided iterator of tweet_ids. """ - log.info("retrieving retweets of %s", tweet_id) - url = "https://api.twitter.com/1.1/statuses/retweets/""{}.json".format( - tweet_id) - - resp = self.get(url, params={"count": 100}) - for tweet in resp.json(): - yield tweet + if not isinstance(tweet_ids, types.GeneratorType): + tweet_ids = iter(tweet_ids) + + for tweet_id in tweet_ids: + if hasattr(tweet_id, 'strip'): + tweet_id = tweet_id.strip() + log.info("retrieving retweets of %s", tweet_id) + url = "https://api.twitter.com/1.1/statuses/retweets/""{}.json".format( + tweet_id) + try: + resp = self.get(url, params={"count": 100}, allow_404=True) + for tweet in resp.json(): + yield tweet + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + log.info("can't get tweets for non-existent tweet: %s", tweet_id) def trends_available(self): """ diff --git a/twarc/command.py b/twarc/command.py index 7c0f94ab..3eea47f1 100644 --- a/twarc/command.py +++ b/twarc/command.py @@ -188,7 +188,15 @@ def stop(signal, frame): things = t.timeline(**kwargs) elif command == "retweets": - things = t.retweets(query) + if os.path.isfile(query): + iterator = fileinput.FileInput( + query, + mode='r', + openhook=fileinput.hook_compressed, + ) + things = t.retweets(tweet_ids=iterator) + else: + things = t.retweets(tweet_ids=query.split(',')) elif command == "users": if os.path.isfile(query):