From 04bb127b03e38a2979ca4789b5b153dfafee6535 Mon Sep 17 00:00:00 2001 From: Elizabeth Alpert Date: Mon, 22 May 2023 12:58:39 +1000 Subject: [PATCH] lint --- src/tidy_tweet/database.py | 5 ++++- src/tidy_tweet/processing.py | 6 ++++-- src/tidy_tweet/tweet_mapping.py | 18 ++++++++++++------ tests/test_metadata.py | 1 + 4 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/tidy_tweet/database.py b/src/tidy_tweet/database.py index feafb0a..60f24fa 100644 --- a/src/tidy_tweet/database.py +++ b/src/tidy_tweet/database.py @@ -95,7 +95,10 @@ def initialise_sqlite( logger.debug("Created database tables: " + str(created_tables)) assert len(created_tables) == len(mapping.create_table_statements) cursor.execute("create table schema_version (schema_version text)") - cursor.execute("insert into schema_version values (:version)", {"version": mapping.SCHEMA_VERSION}) + cursor.execute( + "insert into schema_version values (:version)", + {"version": mapping.SCHEMA_VERSION}, + ) logger.info("The database schema has been initialised") diff --git a/src/tidy_tweet/processing.py b/src/tidy_tweet/processing.py index 13c422f..c612f0c 100644 --- a/src/tidy_tweet/processing.py +++ b/src/tidy_tweet/processing.py @@ -9,7 +9,9 @@ logger = getLogger(__name__) -def _load_page_object(file_name: str, page_json: Mapping, connection: sqlite3.Connection): +def _load_page_object( + file_name: str, page_json: Mapping, connection: sqlite3.Connection +): """ Takes a page of twarc Twitter API results and loads it into the database. @@ -31,7 +33,7 @@ def _load_page_object(file_name: str, page_json: Mapping, connection: sqlite3.Co # Write this first so we can get the page id db.execute( mapping.sql_by_table["results_page"]["insert"], - mapping.map_page_metadata(file_name, twitter_metadata, twarc_metadata) + mapping.map_page_metadata(file_name, twitter_metadata, twarc_metadata), ) page_info = (file_name, db.lastrowid) diff --git a/src/tidy_tweet/tweet_mapping.py b/src/tidy_tweet/tweet_mapping.py index 6808ec2..c67d440 100644 --- a/src/tidy_tweet/tweet_mapping.py +++ b/src/tidy_tweet/tweet_mapping.py @@ -358,7 +358,7 @@ def map_user(user_json, source_file, page_id) -> Dict[str, List[Dict]]: "location": user_json.get("location", None), "pinned_tweet_id": user_json.get("pinned_tweet_id", None), "source_file": source_file, - "page_id": page_id + "page_id": page_id, } mappings = {"user": [user_map]} @@ -434,7 +434,9 @@ def map_user(user_json, source_file, page_id) -> Dict[str, List[Dict]]: } -def map_tweet(tweet_json, directly_collected: bool, source_file: str, page_id) -> Dict[str, List[Dict]]: +def map_tweet( + tweet_json, directly_collected: bool, source_file: str, page_id +) -> Dict[str, List[Dict]]: tweet_map = { "id": tweet_json["id"], "author_id": tweet_json["author_id"], @@ -452,7 +454,7 @@ def map_tweet(tweet_json, directly_collected: bool, source_file: str, page_id) - "retweet_count": tweet_json["public_metrics"]["retweet_count"], "directly_collected": directly_collected, "source_file": source_file, - "page_id": page_id + "page_id": page_id, } if "in_reply_to_user_id" in tweet_json: @@ -521,9 +523,11 @@ def map_tweet(tweet_json, directly_collected: bool, source_file: str, page_id) - :twarc_version, :tidy_tweet_version, :additional_metadata ) - """ + """, } -sql_views["results_file"] = """ +sql_views[ + "results_file" +] = """ create view results_file as select file_name, @@ -574,7 +578,9 @@ def map_tweet(tweet_json, directly_collected: bool, source_file: str, page_id) - # return metadata -def map_page_metadata(filename:str, page_metadata_json: Dict, twarc_metadata_json: Dict) -> Dict: +def map_page_metadata( + filename: str, page_metadata_json: Dict, twarc_metadata_json: Dict +) -> Dict: metadata = {"file_name": filename} # Tidy tweet metadata diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 4c106b7..a3a3bf6 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -8,4 +8,5 @@ def test_get_tidy_tweet_version(): assert version != "unknown" and version != "unspecified" + # TODO: metadata test