Skip to content

Commit

Permalink
blacken
Browse files Browse the repository at this point in the history
  • Loading branch information
betsybookwyrm committed Feb 21, 2022
1 parent 530d05f commit 1ca16bb
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 44 deletions.
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ where = src
[options.extras_require]
development =
nox >= 2021.10.1
pytest
flake8
black

[options.entry_points]
console_scripts =
Expand Down
9 changes: 6 additions & 3 deletions src/tidy_tweet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# flake8: noqa F401
from tidy_tweet.processing import load_twarc_json_to_sqlite
from tidy_tweet.database import initialise_sqlite, check_database_version, \
SchemaVersionMismatchError, LibraryVersionMismatchWarning
from tidy_tweet.database import (
initialise_sqlite,
check_database_version,
SchemaVersionMismatchError,
LibraryVersionMismatchWarning,
)
from tidy_tweet.tweet_mapping import create_table_statements as database_schema

28 changes: 10 additions & 18 deletions src/tidy_tweet/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,9 @@


@click.command()
@click.argument(
"database",
type=click.Path(path_type=Path),
required=True
)
@click.argument(
"json_files",
type=click.Path(exists=True),
nargs=-1
)
def tidy_twarc_jsons(
database: Path,
json_files: Collection[Union[str, PathLike]]
):
@click.argument("database", type=click.Path(path_type=Path), required=True)
@click.argument("json_files", type=click.Path(exists=True), nargs=-1)
def tidy_twarc_jsons(database: Path, json_files: Collection[Union[str, PathLike]]):
"""
Tidies Twitter json collected with Twarc into relational tables.
Expand Down Expand Up @@ -57,8 +46,8 @@ def tidy_twarc_jsons(
raise click.UsageError(e.message()) from e
except sqlite3.DatabaseError as e:
raise click.BadParameter(
f"{database} is not a database file.",
param_hint='database') from e
f"{database} is not a database file.", param_hint="database"
) from e
except Exception as e:
raise e

Expand All @@ -79,8 +68,11 @@ def tidy_twarc_jsons(
total_pages = total_pages + p
click.echo(f"{p} pages of Twitter results loaded from {file}")

click.echo(f"All done! {total_pages} pages of tweets loaded into {database} from {n} files.")
click.echo(
f"All done! {total_pages} pages of tweets loaded into {database} from {n} "
f"files."
)


if __name__ == '__main__':
if __name__ == "__main__":
tidy_twarc_jsons()
44 changes: 27 additions & 17 deletions src/tidy_tweet/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@ def __init__(self, library_schema_version, db_schema_version, db_name, *args):
super().__init__(*args)

def message(self):
msg = f"Database file {self.db_name} is using tidy_tweet database schema " \
f"version {self.db_schema_version} but the version of tidy_tweet you " \
f"are running is using tidy_tweet database schema version " \
f"{self.db_schema_version}. These versions are not compatible. It is " \
f"recommended to reprocess all your json files into a fresh database."
msg = (
f"Database file {self.db_name} is using tidy_tweet database schema "
f"version {self.db_schema_version} but the version of tidy_tweet you "
f"are running is using tidy_tweet database schema version "
f"{self.db_schema_version}. These versions are not compatible. It is "
f"recommended to reprocess all your json files into a fresh database."
)
return msg

def __str__(self):
Expand All @@ -38,12 +40,14 @@ def __init__(self, this_library_version, db_library_version, db_name, *args):
super().__init__(*args)

def message(self):
msg = f"Database file {self.db_name} contains data processed with tidy_tweet " \
f"version {self.db_library_version}, but the version of tidy_tweet you " \
f"are currently using is version {self.library_version}. This is not " \
f"necessarily incompatible, but if you notice any inconsistencies with " \
f"how the data is parsed, you may wish to reprocess all your json files "\
f"into a fresh and consistent database."
msg = (
f"Database file {self.db_name} contains data processed with tidy_tweet "
f"version {self.db_library_version}, but the version of tidy_tweet you "
f"are currently using is version {self.library_version}. This is not "
f"necessarily incompatible, but if you notice any inconsistencies with "
f"how the data is parsed, you may wish to reprocess all your json files "
f"into a fresh and consistent database."
)
return msg

def __str__(self):
Expand Down Expand Up @@ -102,22 +106,28 @@ def check_database_version(db_name):
db = conn.cursor()
db.execute(
"""
select metadata_value from _metadata
select metadata_value from _metadata
where metadata_key='schema_version'
""")
"""
)
result = db.fetchone() or []
db_schema_version = None if len(result) == 0 else result[0]
db.execute(
"""
select metadata_value from _metadata
select metadata_value from _metadata
where metadata_key='tidy_tweet_version'
""")
"""
)
result = db.fetchone() or []
db_library_version = None if len(result) == 0 else result[0]
if db_schema_version != mapping.SCHEMA_VERSION:
raise SchemaVersionMismatchError(mapping.SCHEMA_VERSION, db_schema_version, db_name)
raise SchemaVersionMismatchError(
mapping.SCHEMA_VERSION, db_schema_version, db_name
)
if db_library_version != library_version:
warning = LibraryVersionMismatchWarning(library_version, db_library_version, db_name)
warning = LibraryVersionMismatchWarning(
library_version, db_library_version, db_name
)
logger.warning(warning.message())
warn(warning)
else:
Expand Down
8 changes: 6 additions & 2 deletions src/tidy_tweet/tweet_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,9 +408,13 @@ def map_tidy_tweet_metadata() -> Dict[str, List[Dict]]:
from tidy_tweet._version import version
except ImportError:
version = "unspecified"
logger.warn("WARNING: cannot store tidy_tweet version in database as version could not be fetched. If running tidy_tweet from source, try installing package in editable mode.")
logger.warn(
"WARNING: cannot store tidy_tweet version in database as version could not "
"be fetched. If running tidy_tweet from source, try installing package in "
"editable mode."
)

# TODO: how to handle database with multiple additions of files with different library versions?
# TODO: how to handle db with additions of files with different library versions?
return {
"_metadata": [
{"metadata_key": "schema_version", "metadata_value": SCHEMA_VERSION},
Expand Down
4 changes: 0 additions & 4 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from click.testing import CliRunner
from tidy_tweet.__main__ import tidy_twarc_jsons
from test_overall import timeline_json_file


def test_no_args():
Expand All @@ -26,6 +25,3 @@ def test_one_arg(tmp_path):
assert result.exit_code != 0

# Just a json file - should fail



0 comments on commit 1ca16bb

Please sign in to comment.