CDCgov · robertandremitchell · Dec 5, 2025 · Dec 5, 2025 · Dec 8, 2025 · Dec 8, 2025
diff --git a/azure_scripts/performance.ipynb b/azure_scripts/performance.ipynb
@@ -20,6 +20,11 @@ Data created as part of curation, augmentation, or synthetic generation for mode
 
 Data used to evaluate the accuracy of codes assigned by the TTC model to the expected codes and is categoried under `/accuracy_evaluation`.
 
-- `build_eval_files.py` creates the files required to complete the evaluation using the eRSD.
+- `build_evaluation_files.py` creates the files required to complete the evaluation using the eRSD.
 - `oid_to_conditions.txt` is a json file that logs the OID-SNOMED condition ID key-pairs.
-- `loinc_to_oids.txt` is a josn file that logs the LOINC code to the array of 1+ OIDs that leverage that LOINC code.
+- `loinc_to_oids.txt` is a json file that logs the LOINC code to the array of 1+ OIDs that leverage that LOINC code.
+- `add_loinc_codes.py` adds LOINC codes to a JSONL that only has the display name for the expected and returned text fields. This will probably be deprecated once we add LOINC codes to the embedding files to avoid 1000s of calls to the LOINC API.
+- `evaluation.py` is a script that takes a JSON containing the expected and returned LOINC codes and runs a comparison to determine the accuracy of a match. More information on the criteria to determine the degree of correctness of a match can be found here: https://docs.google.com/document/d/1yA5NJ06mf1EfLZRmNrrNKopWL6ExMj-dPYKy8wlVDGs/edit?tab=t.0#heading=h.rn5y5vzcin6p.
+- `/accuracy_evaluation/sample_data/` is a folder that contains small portions of data to confirm the efficacy of the `evaluation.py` script.
+  - `eval_results_snippet.jsonl` is a portion of the output of the `performance.ipynb` notebook that currently lacks the LOINC codes. `eval_results_snippet_with_loinc_codes.json` is the created output of the `add_loinc_codes.py` script that can be used with the `evaluation.py` script; `evaluation_results_eval_results_snippet_with_loinc_codes.json` is the result.
+  - `sample_evaluation_file.txt` is a dummy file that can be run against the evaluation script to confirm the logic for a first-, second-, and third-degree match; `evaluation_results_sample_evaluation_file.json` is the output file.
@@ -0,0 +1,87 @@
+import json
+import os
+
+import requests
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# sample run : python data/accuracy_evaluation/add_loinc_codes.py
+file_path = "data/accuracy_evaluation/sample_data/eval_results_snippet.jsonl"
+
+# obtain LOINC API credentials from environment variables
+USERNAME = os.environ.get("LOINC_USERNAME")
+PASSWORD = os.environ.get("LOINC_PASSWORD")
+
+url = "https://loinc.regenstrief.org/searchapi/loincs"
+
+
+def search_loinc(raw_query: str):
+    """
+    Searches the LOINC database using the provided raw query string.
+    :param raw_query: The raw query string to search for in the LOINC database.
+    """
+    params = {"query": f'"{raw_query}"'}
+    response = requests.get(
+        url,
+        params=params,
+        auth=(USERNAME, PASSWORD),
+        timeout=30,
+    )
+    return response.json()
+
+if __name__ == "__main__":
+    with open(file_path, "r") as f:
+        raw_eval_data = [json.loads(line) for line in f if line.strip()]
+
+    loinc_dict = {}
+    # build loinc dictionary for caching
+    for item in raw_eval_data:
+        if item.get("expected_label") not in loinc_dict.keys():
+            search = search_loinc(item.get("expected_label"))
+            loinc_dict[item.get("expected_label")] = (
+                search.get("Results")[0].get("LOINC_NUM")
+                if search.get("ResponseSummary").get("RowsReturned") == 1
+                else None
+            )
+        for result in item.get("results"):
+            if result.get("label") not in loinc_dict.keys():
+                search = search_loinc(result.get("label"))
+                loinc_dict[result.get("label")] = (
+                    search.get("Results")[0].get("LOINC_NUM")
+                    if search.get("ResponseSummary").get("RowsReturned") == 1
+                    else None
+                )
+
+    # create evaluation data with LOINC codes added
+    eval_data = []
+    for item in raw_eval_data:
+        grouped_row = {
+            "example_idx": item.get("example_idx"),
+            "k-run": item.get("k"),
+            "raw_text": item.get("query_input"),
+            "expected_text": item.get("expected_label"),
+            "expected_loinc": loinc_dict.get(item.get("expected_label")),
+            "results": [],
+        }
+        for result in item.get("results"):
+            grouped_row["results"].append(
+                {
+                    "id": str(item.get("example_idx"))
+                    + "_"
+                    + str(item.get("k"))
+                    + "_"
+                    + str(result.get("rank")),
+                    "rank": result.get("rank"),
+                    "returned_text": result.get("label"),
+                    "returned_loinc": loinc_dict.get(result.get("label")),
+                }
+            )
+        eval_data.append(grouped_row)
+
+    export_file = file_path.replace(".jsonl", "_with_loinc_codes.txt")
+    with open(
+        export_file,
+        "w",
+    ) as f:
+        json.dump(eval_data, f, indent=2)
@@ -1,16 +1,16 @@
-import json
 import os
 
 import requests
 from dotenv import load_dotenv
+from utils import export_json
 
 load_dotenv()
 
-# sample run: python data/accuracy_evaluation/build_eval_files.py
+# sample run: python data/accuracy_evaluation/build_evaluation_files.py
 # TES API Key can be obtained at https://tes.tools.aimsplatform.org/
 TES_API_KEY = os.environ.get("TES_API_KEY")
 TES_BASE_URL = "https://tes.tools.aimsplatform.org/api/fhir/ValueSet/"
-headers = {"x-api-key": TES_API_KEY, "Accept": "application/fhir+json"}
+TES_HEADERS = {"x-api-key": TES_API_KEY, "Accept": "application/fhir+json"}
 
 # ERSD, key can be obtained at https://ersd.aimsplatform.org/#/api-keys
 ERSD_API_KEY = os.environ.get("ERSD_API_KEY")
@@ -49,7 +49,7 @@ def get_tes_valuesets(endpoints: list) -> list:
     oids = []
     for endpoint in endpoints:
         url = f"{TES_BASE_URL}{endpoint}"
-        response = requests.get(url, headers=headers)
+        response = requests.get(url, headers=TES_HEADERS)
         for resp in response.json().get("compose").get("include"):
             oid = resp.get("valueSet")[0].split("/")[-1]
             if oid not in oids:
@@ -99,7 +99,7 @@ def build_tes_mapping_files(oids: list) -> tuple[dict, dict]:
     loinc_to_oids = {}
     for oid in oids:
         url = f"{TES_BASE_URL}{oid}"
-        response = requests.get(url, headers=headers)
+        response = requests.get(url, headers=TES_HEADERS)
         snomed = (
             response.json()
             .get("useContext")[0]
@@ -151,21 +151,11 @@ def evaluate_mapping_files(oid_to_conditions: dict, loinc_to_oids: dict):
         print(k, multiple_conditions[k])
 
 
-def export_mapping_files(mapping: dict, filename: str):
-    """
-    Exports the mapping dictionary to a JSON file.
-    :param mapping: Dictionary to save.
-    :param filename: Output filename.
-    """
-    with open(filename, "w") as f:
-        json.dump(mapping, f, indent=2)
-
-
 if __name__ == "__main__":
     # Build ERSD mapping files
     response, oids = get_ersd_valuesets(VSTYPE_ENDPOINTS)
     oid_to_conditions, loinc_to_oids = build_ersd_mapping_files(response, oids)
 
     evaluate_mapping_files(oid_to_conditions, loinc_to_oids)
-    export_mapping_files(oid_to_conditions, "data/accuracy_evaluation/oid_to_conditions.txt")
-    export_mapping_files(loinc_to_oids, "data/accuracy_evaluation/loinc_to_oids.txt")
+    export_json(oid_to_conditions, "data/accuracy_evaluation/oid_to_conditions.txt")
+    export_json(loinc_to_oids, "data/accuracy_evaluation/loinc_to_oids.txt")
@@ -0,0 +1,88 @@
+import os
+from utils import export_json
+from utils import import_json
+
+# sample run: python data/accuracy_evaluation/evaluation.py
+# could make this a sys.argv parameter or just update directly as needed, for now hardcoding
+# other sample file to determine efficacy: data/accuracy_evaluation/sample_data/evaluation_results_sample_evaluation_file.json
+# LOINC codes can be added to a file lacking them using add_loinc_codes.py
+input_file = "data/accuracy_evaluation/sample_data/eval_results_snippet_with_loinc_codes.txt"
+
+# Mapping files generated from build_evaluation_files.py
+loinc_to_oids_file = "data/accuracy_evaluation/loinc_to_oids.txt"
+oid_to_conditions_file = "data/accuracy_evaluation/oid_to_conditions.txt"
+
+
+def accuracy_evaluation(
+    loinc_to_oids_file: str, oid_to_conditions_file: str, input_file: str
+) -> list[dict]:
+    """
+    Evaluates the accuracy of LOINC to OID mappings against known conditions.
+    :param loinc_to_oids_file: Path to the LOINC to OIDs mapping file.
+    :param oid_to_conditions_file: Path to the OID to conditions mapping file.
+    :param input_file: Path to the input JSON file containing LOINC codes to evaluate.
+    """
+    loinc_to_oids = import_json(loinc_to_oids_file)
+    oid_to_conditions = import_json(oid_to_conditions_file)
+    eval_data = import_json(input_file)
+
+    status_priority = {
+        "first-degree match": 5,
+        "second-degree match, one unique condition": 4,
+        "third-degree match, one unique condition": 3,
+        "third-degree match, multiple unique conditions": 2,
+        "no match": 1,
+        "no OIDs returned, investigate LOINC validity": 0,
+        "no conditions returned, investigate OID validity": 0,
+        "no LOINC returned": 0,
+    }
+
+    for item in eval_data:
+        expected_loinc = item.get("expected_loinc")
+        for result in item.get("results"):
+            returned_loinc = result.get("returned_loinc")
+
+            returned_oids = sorted(loinc_to_oids.get(returned_loinc, []) if returned_loinc else [])
+            expected_oids = sorted(loinc_to_oids.get(expected_loinc, []) if expected_loinc else [])
+
+            returned_conditions = sorted(
+                list(set(oid_to_conditions.get(oid) for oid in returned_oids))
+            )
+            expected_conditions = sorted(
+                list(set(oid_to_conditions.get(oid) for oid in expected_oids))
+            )
+            if returned_loinc is None:
+                status = "no LOINC returned"
+            elif returned_loinc == expected_loinc:
+                status = "first-degree match"
+            elif returned_oids == expected_oids and len(returned_conditions) == 1:
+                status = "second-degree match, one unique condition"
+            elif returned_conditions == expected_conditions and len(returned_conditions) == 1:
+                status = "third-degree match, one unique condition"
+            elif returned_conditions == expected_conditions and len(returned_conditions) > 1:
+                status = "third-degree match, multiple unique conditions"
+            elif not returned_oids:
+                status = "no OIDs returned, investigate LOINC validity"
+            elif not returned_conditions:
+                status = "no conditions returned, investigate OID validity"
+            else:
+                status = "no match"
+            result["status"] = status
+
+            # determine best status for the overall item (i.e., did any of the results achieve a match)
+            best_status = "no match"
+            if status_priority.get(status, 0) > status_priority.get(best_status, 0):
+                best_status = status
+
+            if best_status == "first-degree match":
+                break
+
+        item["status"] = best_status
+
+    return eval_data
+
+
+if __name__ == "__main__":
+    results = accuracy_evaluation(loinc_to_oids_file, oid_to_conditions_file, input_file)
+    input_file_name = input_file.split("/")[-1].split(".")[0]
+    export_json(results, os.path.join(os.path.split(input_file)[0], f"evaluation_results_{input_file_name}.json"))