Skip to content

Commit

Permalink
packaging! not quite right yet but almost
Browse files Browse the repository at this point in the history
  • Loading branch information
betsybookwyrm committed Nov 16, 2021
1 parent 2db0a5d commit 8ad8d40
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 10 deletions.
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2021 QUT Digital Observatory

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include tests/data/*.jsonl tests/data/*.json
14 changes: 14 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import nox


@nox.session
def tests(session):
session.install("pytest")
session.run("pytest")


@nox.session
def lint(session):
session.install("flake8", "black")
session.run("flake8")
session.run("black", "--check", ".")
10 changes: 10 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[build-system]
requires = [
"setuptools>=42",
"wheel",
"setuptools_scm>=6.2"
]
build-backend = "setuptools.build_meta"

[tool.setuptools_scm]
write_to = "src/tidy_tweet/_version.py"
26 changes: 26 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,3 +1,29 @@
[metadata]
name = tidy_tweet
author = QUT Digital Observatory
author_email = [email protected]
description = Tidies Twitter json collected with Twarc into relational tables
long_description = file: README.md
long_description_content_type = text/markdown
url = https://github.com/QUT-Digital-Observatory/tidy_tweet
project_urls =
Bug Tracker = https://github.com/QUT-Digital-Observatory/tidy_tweet/issues
classifiers =
Programming Language :: Python :: 3
License :: OSI Approved :: MIT License
Operating System :: OS Independent
Environment :: Console
Intended Audience :: Science/Research
Topic :: Sociology
[options]
package_dir =
= src
packages = find:
python_requires = >=3.8

[options.packages.find]
where = src

[flake8]
# Copied from https://sbarnea.com/lint/black/
# Recommend matching the black line length (default 88),
Expand Down
1 change: 1 addition & 0 deletions tidy_tweet/__init__.py → src/tidy_tweet/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# flake8: noqa F401
from tidy_tweet.processing import initialise_sqlite, load_twarc_json_to_sqlite
from logging import basicConfig, getLogger
from tidy_tweet.tweet_mapping import create_table_statements as database_schema
Expand Down
5 changes: 5 additions & 0 deletions src/tidy_tweet/_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# coding: utf-8
# file generated by setuptools_scm
# don't change, don't track in version control
version = "0.1.dev14+g2db0a5d.d20211116"
version_tuple = (0, 1, "dev14", "g2db0a5d.d20211116")
11 changes: 6 additions & 5 deletions tidy_tweet/processing.py → src/tidy_tweet/processing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sqlite3
import json
from typing import Union, List, Iterable, Mapping
from typing import Union, Mapping
from os import PathLike
from pathlib import Path
import tidy_tweet.tweet_mapping as mapping
Expand All @@ -21,10 +21,11 @@ def initialise_sqlite(
:param db_name: File path to create a new database at. This is expected to not
already exist.
:param allow_existing_database: Only set this to True if you want to add the tidy_tweet
tables to an existing database, such as one where you have other data pre-existing. This
function expects the tidy_tweet tables to not already exist in the database. This
behaviour, while possible, is not currently supported by tidy_tweet.
:param allow_existing_database: Only set this to True if you want to add the
tidy_tweet tables to an existing database, such as one where you have other data
pre-existing. This function expects the tidy_tweet tables to not already exist in
the database. This behaviour, while possible, is not currently supported by
tidy_tweet.
"""
db_name = Path(db_name)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
create table url (
source_id text, -- the id of the object (user or tweet) this URL is included in
source_type text, -- "user" or "tweet"
field text not null, -- e.g. "description", "text" - which field of the source object the URL is in
field text not null, -- e.g. "description", "text" - which field of the source
-- object the URL is in
url text not null, -- t.co shortened URL
expanded_url text not null,
display_url text
Expand Down Expand Up @@ -56,7 +57,8 @@ def map_urls(
create table hashtag (
source_id text, -- the id of the object (user or tweet) this hashtag is included in
source_type text, -- "user" or "tweet"
field text not null, -- e.g. "description", "text" - which field of the source object the hashtag is in
field text not null, -- e.g. "description", "text" - which field of the source
-- object the hashtag is in
tag text not null
)
""",
Expand Down Expand Up @@ -93,7 +95,8 @@ def map_hashtags(
create table mention (
source_id text, -- the id of the object (user or tweet) this mention is included in
source_type text, -- "user" or "tweet"
field text not null, -- e.g. "description", "text" - which field of the source object the mention is in
field text not null, -- e.g. "description", "text" - which field of the source
-- object the mention is in
username text not null -- username of mentioned user
)
""",
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions tests/test_overall.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def test_load_timeline(tmp_path):
# Check number of tweets
db.execute(
"""
select directly_collected, count(*)
from tweet
select directly_collected, count(*)
from tweet
group by directly_collected;
"""
)
Expand Down

0 comments on commit 8ad8d40

Please sign in to comment.