From 8ad8d4041d645272b72eefa53748a84fd8fd2720 Mon Sep 17 00:00:00 2001 From: betsy Date: Tue, 16 Nov 2021 14:36:01 +1000 Subject: [PATCH] packaging! not quite right yet but almost --- LICENSE | 21 +++++++++++++++ MANIFEST.in | 1 + noxfile.py | 14 ++++++++++ pyproject.toml | 10 +++++++ setup.cfg | 26 +++++++++++++++++++ {tidy_tweet => src/tidy_tweet}/__init__.py | 1 + src/tidy_tweet/_version.py | 5 ++++ {tidy_tweet => src/tidy_tweet}/processing.py | 11 ++++---- .../tidy_tweet}/tweet_mapping.py | 9 ++++--- {tidy_tweet => src/tidy_tweet}/utilities.py | 0 tests/test_overall.py | 4 +-- 11 files changed, 92 insertions(+), 10 deletions(-) create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 noxfile.py create mode 100644 pyproject.toml rename {tidy_tweet => src/tidy_tweet}/__init__.py (92%) create mode 100644 src/tidy_tweet/_version.py rename {tidy_tweet => src/tidy_tweet}/processing.py (91%) rename {tidy_tweet => src/tidy_tweet}/tweet_mapping.py (97%) rename {tidy_tweet => src/tidy_tweet}/utilities.py (100%) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b867c70 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 QUT Digital Observatory + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..61a8d5d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include tests/data/*.jsonl tests/data/*.json diff --git a/noxfile.py b/noxfile.py new file mode 100644 index 0000000..e05b9e3 --- /dev/null +++ b/noxfile.py @@ -0,0 +1,14 @@ +import nox + + +@nox.session +def tests(session): + session.install("pytest") + session.run("pytest") + + +@nox.session +def lint(session): + session.install("flake8", "black") + session.run("flake8") + session.run("black", "--check", ".") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..6c1ab8f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel", + "setuptools_scm>=6.2" +] +build-backend = "setuptools.build_meta" + +[tool.setuptools_scm] +write_to = "src/tidy_tweet/_version.py" diff --git a/setup.cfg b/setup.cfg index abb731d..4067d8d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,29 @@ +[metadata] +name = tidy_tweet +author = QUT Digital Observatory +author_email = digitalobservatory@qut.edu.au +description = Tidies Twitter json collected with Twarc into relational tables +long_description = file: README.md +long_description_content_type = text/markdown +url = https://github.com/QUT-Digital-Observatory/tidy_tweet +project_urls = + Bug Tracker = https://github.com/QUT-Digital-Observatory/tidy_tweet/issues +classifiers = + Programming Language :: Python :: 3 + License :: OSI Approved :: MIT License + Operating System :: OS Independent + Environment :: Console + Intended Audience :: Science/Research + Topic :: Sociology +[options] +package_dir = + = src +packages = find: +python_requires = >=3.8 + +[options.packages.find] +where = src + [flake8] # Copied from https://sbarnea.com/lint/black/ # Recommend matching the black line length (default 88), diff --git a/tidy_tweet/__init__.py b/src/tidy_tweet/__init__.py similarity index 92% rename from tidy_tweet/__init__.py rename to src/tidy_tweet/__init__.py index 4616f52..b03d90c 100644 --- a/tidy_tweet/__init__.py +++ b/src/tidy_tweet/__init__.py @@ -1,3 +1,4 @@ +# flake8: noqa F401 from tidy_tweet.processing import initialise_sqlite, load_twarc_json_to_sqlite from logging import basicConfig, getLogger from tidy_tweet.tweet_mapping import create_table_statements as database_schema diff --git a/src/tidy_tweet/_version.py b/src/tidy_tweet/_version.py new file mode 100644 index 0000000..fb6e83e --- /dev/null +++ b/src/tidy_tweet/_version.py @@ -0,0 +1,5 @@ +# coding: utf-8 +# file generated by setuptools_scm +# don't change, don't track in version control +version = "0.1.dev14+g2db0a5d.d20211116" +version_tuple = (0, 1, "dev14", "g2db0a5d.d20211116") diff --git a/tidy_tweet/processing.py b/src/tidy_tweet/processing.py similarity index 91% rename from tidy_tweet/processing.py rename to src/tidy_tweet/processing.py index a312ebd..c8da761 100644 --- a/tidy_tweet/processing.py +++ b/src/tidy_tweet/processing.py @@ -1,6 +1,6 @@ import sqlite3 import json -from typing import Union, List, Iterable, Mapping +from typing import Union, Mapping from os import PathLike from pathlib import Path import tidy_tweet.tweet_mapping as mapping @@ -21,10 +21,11 @@ def initialise_sqlite( :param db_name: File path to create a new database at. This is expected to not already exist. - :param allow_existing_database: Only set this to True if you want to add the tidy_tweet - tables to an existing database, such as one where you have other data pre-existing. This - function expects the tidy_tweet tables to not already exist in the database. This - behaviour, while possible, is not currently supported by tidy_tweet. + :param allow_existing_database: Only set this to True if you want to add the + tidy_tweet tables to an existing database, such as one where you have other data + pre-existing. This function expects the tidy_tweet tables to not already exist in + the database. This behaviour, while possible, is not currently supported by + tidy_tweet. """ db_name = Path(db_name) diff --git a/tidy_tweet/tweet_mapping.py b/src/tidy_tweet/tweet_mapping.py similarity index 97% rename from tidy_tweet/tweet_mapping.py rename to src/tidy_tweet/tweet_mapping.py index 84347a6..faf3074 100644 --- a/tidy_tweet/tweet_mapping.py +++ b/src/tidy_tweet/tweet_mapping.py @@ -12,7 +12,8 @@ create table url ( source_id text, -- the id of the object (user or tweet) this URL is included in source_type text, -- "user" or "tweet" - field text not null, -- e.g. "description", "text" - which field of the source object the URL is in + field text not null, -- e.g. "description", "text" - which field of the source + -- object the URL is in url text not null, -- t.co shortened URL expanded_url text not null, display_url text @@ -56,7 +57,8 @@ def map_urls( create table hashtag ( source_id text, -- the id of the object (user or tweet) this hashtag is included in source_type text, -- "user" or "tweet" - field text not null, -- e.g. "description", "text" - which field of the source object the hashtag is in + field text not null, -- e.g. "description", "text" - which field of the source + -- object the hashtag is in tag text not null ) """, @@ -93,7 +95,8 @@ def map_hashtags( create table mention ( source_id text, -- the id of the object (user or tweet) this mention is included in source_type text, -- "user" or "tweet" - field text not null, -- e.g. "description", "text" - which field of the source object the mention is in + field text not null, -- e.g. "description", "text" - which field of the source + -- object the mention is in username text not null -- username of mentioned user ) """, diff --git a/tidy_tweet/utilities.py b/src/tidy_tweet/utilities.py similarity index 100% rename from tidy_tweet/utilities.py rename to src/tidy_tweet/utilities.py diff --git a/tests/test_overall.py b/tests/test_overall.py index 23398fc..45f8a86 100644 --- a/tests/test_overall.py +++ b/tests/test_overall.py @@ -26,8 +26,8 @@ def test_load_timeline(tmp_path): # Check number of tweets db.execute( """ - select directly_collected, count(*) - from tweet + select directly_collected, count(*) + from tweet group by directly_collected; """ )