diff --git a/setup.cfg b/setup.cfg index 3b61920..389305e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,6 +29,9 @@ where = src [options.extras_require] development = nox >= 2021.10.1 + pytest + flake8 + black [options.entry_points] console_scripts = diff --git a/src/tidy_tweet/__init__.py b/src/tidy_tweet/__init__.py index 31f2586..cd190c6 100644 --- a/src/tidy_tweet/__init__.py +++ b/src/tidy_tweet/__init__.py @@ -1,6 +1,9 @@ # flake8: noqa F401 from tidy_tweet.processing import load_twarc_json_to_sqlite -from tidy_tweet.database import initialise_sqlite, check_database_version, \ - SchemaVersionMismatchError, LibraryVersionMismatchWarning +from tidy_tweet.database import ( + initialise_sqlite, + check_database_version, + SchemaVersionMismatchError, + LibraryVersionMismatchWarning, +) from tidy_tweet.tweet_mapping import create_table_statements as database_schema - diff --git a/src/tidy_tweet/__main__.py b/src/tidy_tweet/__main__.py index 3147da1..327da28 100644 --- a/src/tidy_tweet/__main__.py +++ b/src/tidy_tweet/__main__.py @@ -16,20 +16,9 @@ @click.command() -@click.argument( - "database", - type=click.Path(path_type=Path), - required=True -) -@click.argument( - "json_files", - type=click.Path(exists=True), - nargs=-1 -) -def tidy_twarc_jsons( - database: Path, - json_files: Collection[Union[str, PathLike]] -): +@click.argument("database", type=click.Path(path_type=Path), required=True) +@click.argument("json_files", type=click.Path(exists=True), nargs=-1) +def tidy_twarc_jsons(database: Path, json_files: Collection[Union[str, PathLike]]): """ Tidies Twitter json collected with Twarc into relational tables. @@ -57,8 +46,8 @@ def tidy_twarc_jsons( raise click.UsageError(e.message()) from e except sqlite3.DatabaseError as e: raise click.BadParameter( - f"{database} is not a database file.", - param_hint='database') from e + f"{database} is not a database file.", param_hint="database" + ) from e except Exception as e: raise e @@ -79,8 +68,11 @@ def tidy_twarc_jsons( total_pages = total_pages + p click.echo(f"{p} pages of Twitter results loaded from {file}") - click.echo(f"All done! {total_pages} pages of tweets loaded into {database} from {n} files.") + click.echo( + f"All done! {total_pages} pages of tweets loaded into {database} from {n} " + f"files." + ) -if __name__ == '__main__': +if __name__ == "__main__": tidy_twarc_jsons() diff --git a/src/tidy_tweet/database.py b/src/tidy_tweet/database.py index 6e5afb6..8c289ba 100644 --- a/src/tidy_tweet/database.py +++ b/src/tidy_tweet/database.py @@ -19,11 +19,13 @@ def __init__(self, library_schema_version, db_schema_version, db_name, *args): super().__init__(*args) def message(self): - msg = f"Database file {self.db_name} is using tidy_tweet database schema " \ - f"version {self.db_schema_version} but the version of tidy_tweet you " \ - f"are running is using tidy_tweet database schema version " \ - f"{self.db_schema_version}. These versions are not compatible. It is " \ - f"recommended to reprocess all your json files into a fresh database." + msg = ( + f"Database file {self.db_name} is using tidy_tweet database schema " + f"version {self.db_schema_version} but the version of tidy_tweet you " + f"are running is using tidy_tweet database schema version " + f"{self.db_schema_version}. These versions are not compatible. It is " + f"recommended to reprocess all your json files into a fresh database." + ) return msg def __str__(self): @@ -38,12 +40,14 @@ def __init__(self, this_library_version, db_library_version, db_name, *args): super().__init__(*args) def message(self): - msg = f"Database file {self.db_name} contains data processed with tidy_tweet " \ - f"version {self.db_library_version}, but the version of tidy_tweet you " \ - f"are currently using is version {self.library_version}. This is not " \ - f"necessarily incompatible, but if you notice any inconsistencies with " \ - f"how the data is parsed, you may wish to reprocess all your json files "\ - f"into a fresh and consistent database." + msg = ( + f"Database file {self.db_name} contains data processed with tidy_tweet " + f"version {self.db_library_version}, but the version of tidy_tweet you " + f"are currently using is version {self.library_version}. This is not " + f"necessarily incompatible, but if you notice any inconsistencies with " + f"how the data is parsed, you may wish to reprocess all your json files " + f"into a fresh and consistent database." + ) return msg def __str__(self): @@ -102,22 +106,28 @@ def check_database_version(db_name): db = conn.cursor() db.execute( """ - select metadata_value from _metadata + select metadata_value from _metadata where metadata_key='schema_version' - """) + """ + ) result = db.fetchone() or [] db_schema_version = None if len(result) == 0 else result[0] db.execute( """ - select metadata_value from _metadata + select metadata_value from _metadata where metadata_key='tidy_tweet_version' - """) + """ + ) result = db.fetchone() or [] db_library_version = None if len(result) == 0 else result[0] if db_schema_version != mapping.SCHEMA_VERSION: - raise SchemaVersionMismatchError(mapping.SCHEMA_VERSION, db_schema_version, db_name) + raise SchemaVersionMismatchError( + mapping.SCHEMA_VERSION, db_schema_version, db_name + ) if db_library_version != library_version: - warning = LibraryVersionMismatchWarning(library_version, db_library_version, db_name) + warning = LibraryVersionMismatchWarning( + library_version, db_library_version, db_name + ) logger.warning(warning.message()) warn(warning) else: diff --git a/src/tidy_tweet/tweet_mapping.py b/src/tidy_tweet/tweet_mapping.py index 563f8cc..878a7a6 100644 --- a/src/tidy_tweet/tweet_mapping.py +++ b/src/tidy_tweet/tweet_mapping.py @@ -408,9 +408,13 @@ def map_tidy_tweet_metadata() -> Dict[str, List[Dict]]: from tidy_tweet._version import version except ImportError: version = "unspecified" - logger.warn("WARNING: cannot store tidy_tweet version in database as version could not be fetched. If running tidy_tweet from source, try installing package in editable mode.") + logger.warn( + "WARNING: cannot store tidy_tweet version in database as version could not " + "be fetched. If running tidy_tweet from source, try installing package in " + "editable mode." + ) - # TODO: how to handle database with multiple additions of files with different library versions? + # TODO: how to handle db with additions of files with different library versions? return { "_metadata": [ {"metadata_key": "schema_version", "metadata_value": SCHEMA_VERSION}, diff --git a/tests/test_cli.py b/tests/test_cli.py index 9ca40d8..c633b0a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,6 +1,5 @@ from click.testing import CliRunner from tidy_tweet.__main__ import tidy_twarc_jsons -from test_overall import timeline_json_file def test_no_args(): @@ -26,6 +25,3 @@ def test_one_arg(tmp_path): assert result.exit_code != 0 # Just a json file - should fail - - -