Skip to content

Commit

Permalink
Lint!
Browse files Browse the repository at this point in the history
  • Loading branch information
betsybookwyrm committed May 25, 2023
1 parent b8e2f05 commit b805e7d
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 45 deletions.
19 changes: 12 additions & 7 deletions generate_schema_diagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,15 @@ def parse_table(statement: str) -> Tuple[str, List[Column], str]:
for i, col in enumerate(columns):
if col.name in key_columns:
del columns[i]
columns.insert(i, Column(
name=col.name,
type=col.type,
constraints="primary key " + col.constraints,
comment=col.comment
))
columns.insert(
i,
Column(
name=col.name,
type=col.type,
constraints="primary key " + col.constraints,
comment=col.comment,
),
)
elif " " in column_def:
# This line has at least two words and hence a column definition
column_def = column_def.split()
Expand All @@ -97,7 +100,9 @@ def parse_table(statement: str) -> Tuple[str, List[Column], str]:
return table_name, columns, table_comment


def write_table_as_list(output: TextIO, table_name: str, columns: List[Column], table_comment: str):
def write_table_as_list(
output: TextIO, table_name: str, columns: List[Column], table_comment: str
):
"""
Prints a markdown-formatted list of the table columns
"""
Expand Down
42 changes: 4 additions & 38 deletions src/tidy_tweet/tweet_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,13 +512,13 @@ def map_tweet(
"insert": """
insert into results_page (
file_name,
oldest_id, newest_id, result_count,
oldest_id, newest_id, result_count,
retrieved_at, request_url,
twarc_version, tidy_tweet_version,
additional_metadata
) values (
:file_name,
:oldest_id, :newest_id, :result_count,
:oldest_id, :newest_id, :result_count,
:retrieved_at, :request_url,
:twarc_version, :tidy_tweet_version,
:additional_metadata
Expand All @@ -533,7 +533,8 @@ def map_tweet(
file_name,
min(oldest_id) as oldest_id, -- oldest tweet id in file
max(newest_id) as newest_id, -- newest tweet id in file
sum(result_count) as result_count, -- count given in API response (sum of all page result counts)
sum(result_count) as result_count, -- count given in API response
-- (sum of all page result counts)
max(inserted_at) as inserted_at,
twarc_version,
min(retrieved_at) as retrieved_at_min, -- earliest retrieval time for pages in file
Expand All @@ -542,41 +543,6 @@ def map_tweet(
group by file_name
"""

# def map_file_metadata(filename: str, page_metadata: List[Dict]) -> Dict[str, List[Dict]]:
# metadata = {"file_name": filename}
#
# # Tidy tweet metadata
# metadata["tidy_tweet_version"] = version
#
# # Condense oldest_id, newest_id, result_count, and retrieved_at max/min
# metadata["oldest_id"] = min([page["oldest_id"] for page in page_metadata])
# metadata["newest_id"] = max([page["newest_id"] for page in page_metadata])
# metadata["result_count"] = sum([page["result_count"] for page in page_metadata])
# metadata["retrieved_at_min"] = min([page["retrieved_at"] for page in page_metadata])
# metadata["retrieved_at_max"] = max([page["retrieved_at"] for page in page_metadata])
#
# # Values that are the same for each page
# metadata["twarc_version"] = page_metadata[0]["twarc_version"]
#
# # Extra info
# extras = []
# for page in page_metadata:
# extras.append(page.pop("additional_metadata"))
#
# metadata["additional_metadata"] = dumps(extras)
# metadata["result_info_per_page"] = dumps([
# {
# "oldest_id": page["oldest_id"],
# "newest_id": page["newest_id"],
# "result_count": page["result_count"],
# "retrieved_at": page["retrieved_at"],
# "request_url": page["request_url"]
# }
# for page in page_metadata
# ])
#
# return metadata


def map_page_metadata(
filename: str, page_metadata_json: Dict, twarc_metadata_json: Dict
Expand Down

0 comments on commit b805e7d

Please sign in to comment.