Skip to content

Commit

Permalink
Add provenance info to written report
Browse files Browse the repository at this point in the history
  • Loading branch information
troyraen committed Jan 25, 2025
1 parent 347357f commit d083658
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 8 deletions.
27 changes: 21 additions & 6 deletions src/hats_import/verification/run_verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from hats import read_hats
from hats.pixel_math.spatial_index import SPATIAL_INDEX_COLUMN

import hats_import
from hats_import.verification.arguments import VerificationArguments


Expand Down Expand Up @@ -179,8 +180,7 @@ def test_is_valid_catalog(self) -> bool:
-------
bool: True if the test passed, else False.
"""
version = f"hats version {hats.__version__}"
test, description = "valid hats", f"Test hats.io.validation.is_valid_catalog ({version})."
test, description = "valid hats", "Test hats.io.validation.is_valid_catalog."
target = self.args.input_catalog_path
self.print_if_verbose(f"\nStarting: {description}")

Expand Down Expand Up @@ -223,7 +223,8 @@ def test_file_sets(self) -> bool:
def test_num_rows(self) -> bool:
"""Test the number of rows in the dataset. Add `Result`s to `results`.
File footers are compared with _metadata and the user-supplied truth (if provided).
Row counts in parquet file footers are compared with the '_metadata' file,
HATS 'properties' file, and (if provided) the user-supplied truth.
Returns
-------
Expand Down Expand Up @@ -463,11 +464,25 @@ def write_results(self, *, write_mode: Literal["a", "w", "x"] = "a") -> None:
Parameters
----------
write_mode : Literal["a", "w", "x"], optional
Mode to be used when writing output file. Passed to pandas.DataFrame.to_csv as `mode`.
Mode to be used when writing the output file. Options have the typical meanings:
- 'w': truncate the file first
- 'x': exclusive creation, failing if the file already exists
- 'a': append to the end of file if it exists
"""
self.args.output_file_path.parent.mkdir(exist_ok=True, parents=True)
header = not (write_mode == "a" and self.args.output_file_path.exists())
self.results_df.to_csv(self.args.output_file_path, mode=write_mode, header=header, index=False)
# Write provenance info
with open(self.args.output_file_path, write_mode, encoding="utf8") as fout:
fout.writelines(
[
"# HATS verification results for\n",
f"# {self.args.input_catalog_path}\n",
f"# Package versions: hats v{hats.__version__}; hats-import v{hats_import.__version__}\n",
f"# User-supplied truth schema: {self.args.truth_schema}\n",
f"# User-supplied truth total rows: {self.args.truth_total_rows}\n",
]
)
# Write results
self.results_df.to_csv(self.args.output_file_path, mode="a", header=True, index=False)
self.print_if_verbose(f"\nVerifier results written to {self.args.output_file_path}")

def print_if_verbose(self, message):
Expand Down
4 changes: 2 additions & 2 deletions tests/hats_import/verification/test_run_verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ def test_runner(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path):
)
verifier = runner.run(args, write_mode="w")
assert verifier.all_tests_passed, "good catalog failed"
written_results = pd.read_csv(args.output_path / args.output_filename)
written_results = pd.read_csv(args.output_path / args.output_filename, comment="#")
assert written_results[result_cols].equals(verifier.results_df[result_cols]), "report failed"

args = VerificationArguments(
input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path, verbose=False
)
verifier = runner.run(args, write_mode="w")
assert not verifier.all_tests_passed, "bad catalog passed"
written_results = pd.read_csv(args.output_path / args.output_filename)
written_results = pd.read_csv(args.output_path / args.output_filename, comment="#")
assert written_results[result_cols].equals(verifier.results_df[result_cols]), "report failed"


Expand Down

0 comments on commit d083658

Please sign in to comment.