Skip to content

Commit

Permalink
Method to compare summary xlsx reports from CI; better organization o…
Browse files Browse the repository at this point in the history
…f baseline and new results for CI comparisons.
  • Loading branch information
aspeake committed Nov 20, 2024
1 parent ecb8f6a commit 0e46d9f
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 52 deletions.
19 changes: 11 additions & 8 deletions .github/workflows/integration_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,17 @@ jobs:
- name: Compare integration test results
run: |
#FIXME temporarily pull from ci_outputs
git fetch
git show origin/ci_outputs:tests/integration_testing/results/agg_results.json > tests/integration_testing/results/agg_results_master.json
git show origin/ci_outputs:tests/integration_testing/results/ecm_results.json > tests/integration_testing/results/ecm_results_master.json
git show origin/ci_outputs:tests/integration_testing/results/plots/tech_potential/Summary_Data-TP.xlsx > tests/integration_testing/results/plots/tech_potential/Summary_Data-TP_master.xlsx
git show origin/ci_outputs:tests/integration_testing/results/plots/max_adopt_potential/Summary_Data-MAP.xlsx > tests/integration_testing/results/plots/tech_potential/Summary_Data-MAP_master.xlsx
python tests/integration_testing/compare_results.py -d tests/integration_testing/results --baseline_suffix _master
git fetch origin master ci_outputs
branch_name="${{ github.ref }}"
if [[ $(git diff --exit-code origin/master ./tests/integration_testing/results/agg_results.json ./tests/integration_testing/results/ecm_results.json) ]]; then
mkdir tests/integration_testing/base_results
git show origin/ci_outputs:tests/integration_testing/results/agg_results.json > tests/integration_testing/base_results/agg_results.json
git show origin/ci_outputs:tests/integration_testing/results/ecm_results.json > tests/integration_testing/base_results/ecm_results.json
git show origin/ci_outputs:tests/integration_testing/results/plots/tech_potential/Summary_Data-TP.xlsx > tests/integration_testing/base_results/Summary_Data-TP.xlsx
git show origin/ci_outputs:tests/integration_testing/results/plots/max_adopt_potential/Summary_Data-MAP.xlsx > tests/integration_testing/base_results/Summary_Data-MAP.xlsx
python tests/integration_testing/compare_results.py --base-dir tests/integration_testing/base_results --new-dir tests/integration_testing/results
fi
- name: Upload artifacts
uses: actions/upload-artifact@v3
with:
Expand All @@ -85,7 +88,7 @@ jobs:
git pull origin $branch_name
git add ./tests/integration_testing/results/*.json
if [[ $(git diff --cached --exit-code) ]]; then
git add ./tests/integration_testing/results
git add ./tests/integration_testing/results/plots
git config --system user.email "[email protected]"
git config --system user.name "GitHub Action"
git commit -m "Upload results files from CI build"
Expand Down
100 changes: 56 additions & 44 deletions tests/integration_testing/compare_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,17 @@ def load_json(file_path):
with open(file_path, 'r') as file:
return json.load(file)

@staticmethod
def load_summary_report(file_path):
df = pd.read_excel(file_path, index_col=list(range(5)))
return df

def compare_dict_keys(self, dict1, dict2, paths, path='', key_diffs=None):
"""Compares nested keys across two dictionaries by recursively searching each level
Args:
dict1 (dict): dictionary to compare
dict2 (dict): dictionary to compare
dict1 (dict): baseline dictionary to compare
dict2 (dict): new dictionary to compare
paths (list): paths to the original files from which the dictionaries are imported
path (str, optional): current dictionary path at whcih to compare. Defaults to ''.
key_diffs (pd.DataFrame, optional): existing summary of difference. Defaults to None.
Expand All @@ -37,12 +42,12 @@ def compare_dict_keys(self, dict1, dict2, paths, path='', key_diffs=None):
only_in_dict2 = keys2 - keys1

if only_in_dict1:
new_row = pd.DataFrame({"Results file": paths[0].stem,
new_row = pd.DataFrame({"Results file": f"{paths[0].parent.name}/{paths[0].name}",
"Unique key": str(only_in_dict1),
"Found at": path[2:]}, index=[0])
key_diffs = pd.concat([key_diffs, new_row], ignore_index=True)
if only_in_dict2:
new_row = pd.DataFrame({"Results file": paths[1].stem,
new_row = pd.DataFrame({"Results file": f"{paths[0].parent.name}/{paths[0].name}",
"Unique key": str(only_in_dict2),
"Found at": path[2:]}, index=[0])
key_diffs = pd.concat([key_diffs, new_row], ignore_index=True)
Expand All @@ -62,8 +67,8 @@ def compare_dict_values(self, dict1, dict2, percent_threshold=10, abs_threshold=
values at common paths. Both thresholds must be met to report results.
Args:
dict1 (dict): dictionary to compare
dict2 (dict): dictionary to compare
dict1 (dict): baseline dictionary to compare
dict2 (dict): new dictionary to compare
percent_threshold (int, optional): the percent difference threshold at which
differences are reported. Defaults to 10.
abs_threshold (int, optional): the abosolute difference threshold at which differences
Expand Down Expand Up @@ -96,40 +101,55 @@ def write_dict_key_report(self, diff_report, output_path):
if diff_report.empty:
return
diff_report.to_csv(output_path, index=False)
print(f"Wrote dictionary key report to {output_path}")

def write_dict_value_report(self, diff_report, output_path):
df = pd.DataFrame(columns=["Results path", "Percent difference"],
data=list(zip(diff_report.keys(), diff_report.values())))
if df.empty:
return
df.to_csv(output_path, index=False)
print(f"Wrote dictionary value report to {output_path}")

def compare_jsons(self, json1_path, json2_path, write_reports=True):
def compare_jsons(self, json1_path, json2_path, output_dir=True):
"""Compare two jsons and report differences in keys and in values
Args:
json1_path (Path): json file to compare
json2_path (Path): json file to compare
json1_path (Path): baseline json file to compare
json2_path (Path): new json file to compare
write_reports (bool, optional): _description_. Defaults to True.
"""
json1 = self.load_json(json1_path)
json2 = self.load_json(json2_path)

# Compare differences in json keys
key_diffs = self.compare_dict_keys(json1, json2, [json1_path, json2_path])
if write_reports:
out_path = json2_path.parent / f"{json2_path.stem}_key_diffs.csv"
self.write_dict_key_report(key_diffs, out_path)
if output_dir is None:
output_dir = json2_path.parent
self.write_dict_key_report(key_diffs, output_dir / f"{json2_path.stem}_key_diffs.csv")

# Compare differences in json values
val_diffs = self.compare_dict_values(json1, json2)
if write_reports:
out_path = json2_path.parent / f"{json2_path.stem}_value_diffs.csv"
self.write_dict_value_report(val_diffs, out_path)
self.write_dict_value_report(val_diffs, output_dir / f"{json2_path.stem}_value_diffs.csv")

def compare_summary_reports(self, report1_path, report2_path, output_dir=None):
"""Compare Summary_Data-TP.xlsx and Summary_Data-MAP.xlsx with baseline files
Args:
report1_path (Path): baseline summary report to compare
report2_path (Path): new summary report to compare
output_dir (Path, optional): _description_. Defaults to None.
"""

def compare_summary_reports(self, report1_path, report2_path, write_reports=True):
# Compare Summary_Data-TP.xlsx and Summary_Data-MAP.xlsx with baseline files
pass
report1 = self.load_summary_report(report1_path)
report2 = self.load_summary_report(report2_path)

diff = ((report2 - report1)/report1).round(2)
if output_dir is None:
output_dir = report2_path.parent
output_path = output_dir / f"{report2_path.stem}_percent_diffs.csv"
diff.to_csv(output_path)
print(f"Wrote Summary_Data percent difference report to {output_path}")


def main():
Expand All @@ -140,38 +160,30 @@ def main():
help="Path to the baseline summary report (Excel file)")
parser.add_argument("--summary-new", type=Path,
help="Path to the new summary report (Excel file)")
parser.add_argument("-d", "--directory", type=Path,
help="Directory containing files to compare")
parser.add_argument("--baseline_suffix", type=str, default="_master",
help="If using the --directory argument, specify the suffix for the "
"baseline files (e.g., '_master')")
parser.add_argument("--new-dir", type=Path, help="Directory containing files to compare")
parser.add_argument("--base-dir", type=Path, help="Directory containing files to compare")
parser.add_argument("--threshold", type=float, default=10,
help="Threshold for percent difference")
args = parser.parse_args()

compare = ScoutCompare()
if args.directory:
if args.base_dir and args.new_dir:
# Compare all files
results_dir = args.directory.resolve()
agg_results_json_base = results_dir / f"agg_results{args.baseline_suffix}.json"
agg_results_json = results_dir / "agg_results.json"
compare.compare_jsons(agg_results_json_base, agg_results_json)

ecm_results_json_base = results_dir / f"ecm_results{args.baseline_suffix}.json"
ecm_results_json = results_dir / "ecm_results.json"
compare.compare_jsons(ecm_results_json_base, ecm_results_json)

plots_dir = results_dir / "plots"
summary_tp_base = plots_dir / "tech_potential" / \
f"Summary_Data-TP{args.baseline_suffix}.xlsx"
summary_tp = plots_dir / "tech_potential" / "Summary_Data-TP.xlsx"
compare.compare_summary_reports(summary_tp_base, summary_tp)

summary_map_base = (plots_dir / "max_adopt_potential" /
f"Summary_Data-MAP{args.baseline_suffix}.xlsx")
summary_map = plots_dir / "tech_potential" / "Summary_Data-MAP.xlsx"
compare.compare_summary_reports(summary_map_base, summary_map)

base_dir = args.base_dir.resolve()
new_dir = args.new_dir.resolve()
agg_json_base = base_dir / "agg_results.json"
agg_json_new = new_dir / "agg_results.json"
compare.compare_jsons(agg_json_base, agg_json_new, output_dir=new_dir)
ecm_json_base = base_dir / "ecm_results.json"
ecm_json_new = new_dir / "ecm_results.json"
compare.compare_jsons(ecm_json_base, ecm_json_new, output_dir=new_dir)

summary_tp_base = base_dir / "Summary_Data-TP.xlsx"
summary_tp_new = new_dir / "plots" / "tech_potential" / "Summary_Data-TP.xlsx"
compare.compare_summary_reports(summary_tp_base, summary_tp_new, output_dir=new_dir)
summary_map_base = base_dir / "Summary_Data-MAP.xlsx"
summary_map_new = new_dir / "plots" / "max_adopt_potential" / "Summary_Data-MAP.xlsx"
compare.compare_summary_reports(summary_map_base, summary_map_new, output_dir=new_dir)
else:
# Compare only as specified by the arguments
if args.json_baseline and args.json_new:
Expand Down

0 comments on commit 0e46d9f

Please sign in to comment.