From 2eff4ed40ebfe87434ca947c55d8006971247d89 Mon Sep 17 00:00:00 2001
From: Melissa DeLucchi <delucchi@andrew.cmu.edu>
Date: Wed, 22 Jan 2025 08:51:04 -0500
Subject: [PATCH] Add test against catalog info total num rows.

---
 .../verification/run_verification.py           | 18 +++++++++++++++---
 .../wrong_files_and_rows/partition_info.csv    |  2 ++
 tests/data/wrong_files_and_rows/properties     |  8 ++++++++
 .../verification/test_run_verification.py      |  2 +-
 4 files changed, 26 insertions(+), 4 deletions(-)
 create mode 100644 tests/data/wrong_files_and_rows/partition_info.csv
 create mode 100644 tests/data/wrong_files_and_rows/properties

diff --git a/src/hats_import/verification/run_verification.py b/src/hats_import/verification/run_verification.py
index f745a4a6..e2e0931a 100644
--- a/src/hats_import/verification/run_verification.py
+++ b/src/hats_import/verification/run_verification.py
@@ -10,6 +10,7 @@
 import pandas as pd
 import pyarrow as pa
 import pyarrow.dataset as pds
+from hats import read_hats
 from hats.pixel_math.spatial_index import SPATIAL_INDEX_COLUMN
 
 from hats_import.verification.arguments import VerificationArguments
@@ -219,9 +220,20 @@ def test_num_rows(self) -> bool:
         description = "Test that number of rows are equal."
         print(f"\nStarting: {description}")
 
+        catalog_prop_len = read_hats(self.args.input_catalog_path).catalog_info.total_rows
+
         # get the number of rows in each file, indexed by file path. we treat this as truth.
         files_df = self._load_nrows(self.files_ds)
-        files_df_total = f"file footers ({files_df.num_rows.sum():,})"
+        files_df_sum = files_df.num_rows.sum()
+        files_df_total = f"file footers ({files_df_sum:,})"
+
+        target = "file footers vs catalog properties"
+        print(f"\t{target}")
+        passed_cat = catalog_prop_len == files_df_sum
+        _description = f" {files_df_total} vs catalog properties ({catalog_prop_len:,})."
+        self.results.append(
+            Result(passed=passed_cat, test=test, target=target, description=description + _description)
+        )
 
         # check _metadata
         target = "file footers vs _metadata"
@@ -245,7 +257,7 @@ def test_num_rows(self) -> bool:
         if self.args.truth_total_rows is not None:
             target = "file footers vs truth"
             print(f"\t{target}")
-            passed_th = self.args.truth_total_rows == files_df.num_rows.sum()
+            passed_th = self.args.truth_total_rows == files_df_sum
             _description = f" {files_df_total} vs user-provided truth ({self.args.truth_total_rows:,})."
             self.results.append(
                 Result(passed=passed_th, test=test, target=target, description=description + _description)
@@ -253,7 +265,7 @@ def test_num_rows(self) -> bool:
         else:
             passed_th = True  # this test did not fail. this is only needed for the return value.
 
-        all_passed = all([passed_md, passed_th])
+        all_passed = all([passed_md, passed_th, passed_cat])
         print(f"Result: {'PASSED' if all_passed else 'FAILED'}")
         return all_passed
 
diff --git a/tests/data/wrong_files_and_rows/partition_info.csv b/tests/data/wrong_files_and_rows/partition_info.csv
new file mode 100644
index 00000000..bf77935e
--- /dev/null
+++ b/tests/data/wrong_files_and_rows/partition_info.csv
@@ -0,0 +1,2 @@
+Norder,Npix
+0,11
diff --git a/tests/data/wrong_files_and_rows/properties b/tests/data/wrong_files_and_rows/properties
new file mode 100644
index 00000000..801bc768
--- /dev/null
+++ b/tests/data/wrong_files_and_rows/properties
@@ -0,0 +1,8 @@
+#HATS catalog
+obs_collection=wrong_files_and_rows
+dataproduct_type=object
+hats_nrows=600
+hats_col_ra=source_ra
+hats_col_dec=source_dec
+hats_order=2
+
diff --git a/tests/hats_import/verification/test_run_verification.py b/tests/hats_import/verification/test_run_verification.py
index 068ce561..697416e4 100644
--- a/tests/hats_import/verification/test_run_verification.py
+++ b/tests/hats_import/verification/test_run_verification.py
@@ -83,7 +83,7 @@ def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_p
     all_failed = not results.passed.any()
     assert all_failed, "bad catalog passed"
 
-    targets = {"file footers vs _metadata", "file footers vs truth"}
+    targets = {"file footers vs catalog properties", "file footers vs _metadata", "file footers vs truth"}
     assert targets == set(results.target), "wrong targets"
 
     expected_bad_file_names = {