Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
299aa9a
changing var name per suggestion
robertandremitchell Dec 5, 2025
204c037
Sample accuracy evaluation function
robertandremitchell Dec 5, 2025
111567a
tinkering with second-degree match logic
robertandremitchell Dec 8, 2025
f20d3d3
dynamic file naming, creating utils.py, continuied thinking on accura…
robertandremitchell Dec 8, 2025
e2b697c
utils support for jsonl
robertandremitchell Dec 9, 2025
5158357
Merge branch 'main' into rob/173-create-functions-to-assess-valueset-…
JNygaard-Skylight Dec 10, 2025
ce6f902
creating potential new validation pair files
robertandremitchell Dec 22, 2025
427b5f1
Merge branch 'main' into rob/173-create-functions-to-assess-valueset-…
robertandremitchell Jan 5, 2026
e05e4aa
using sample snippet of data from performance notebook
robertandremitchell Jan 6, 2026
ee1eb87
removing local paths
robertandremitchell Jan 7, 2026
4fb9b65
changing logic to properly track LOINC codes, refactoring evaluation …
robertandremitchell Jan 8, 2026
8385d28
updating sample evaluation file
robertandremitchell Jan 8, 2026
202ace4
Merge branch 'main' into rob/173-create-functions-to-assess-valueset-…
robertandremitchell Jan 8, 2026
283325e
reorganizing and renaming files for clarity
robertandremitchell Jan 8, 2026
45009ef
updated readme
robertandremitchell Jan 8, 2026
a5b4c54
saving performance notebook updates that use JSONL
robertandremitchell Jan 8, 2026
da06cf8
Merge branch 'main' into rob/173-create-functions-to-assess-valueset-…
robertandremitchell Jan 12, 2026
b58e763
Rolling back changes to performance notebook, adding new changes accu…
robertandremitchell Jan 14, 2026
bc55c64
adding checks to speed up accuracy analysis, more dynamic in setup
robertandremitchell Jan 15, 2026
7f8f759
adding RCKMS-specific trigger LOINC code workflow
robertandremitchell Jan 20, 2026
30811ce
Merge branch 'main' into rob/173-create-functions-to-assess-valueset-…
robertandremitchell Jan 22, 2026
ebcb11c
updating output for review
robertandremitchell Jan 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 6 additions & 16 deletions data/accuracy_evaluation/build_eval_files.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import json
import os

import requests
from dotenv import load_dotenv
from utils import export_json

load_dotenv()

# sample run: python data/accuracy_evaluation/build_eval_files.py
# TES API Key can be obtained at https://tes.tools.aimsplatform.org/
TES_API_KEY = os.environ.get("TES_API_KEY")
TES_BASE_URL = "https://tes.tools.aimsplatform.org/api/fhir/ValueSet/"
headers = {"x-api-key": TES_API_KEY, "Accept": "application/fhir+json"}
TES_HEADERS = {"x-api-key": TES_API_KEY, "Accept": "application/fhir+json"}

# ERSD, key can be obtained at https://ersd.aimsplatform.org/#/api-keys
ERSD_API_KEY = os.environ.get("ERSD_API_KEY")
Expand Down Expand Up @@ -49,7 +49,7 @@ def get_tes_valuesets(endpoints: list) -> list:
oids = []
for endpoint in endpoints:
url = f"{TES_BASE_URL}{endpoint}"
response = requests.get(url, headers=headers)
response = requests.get(url, headers=TES_HEADERS)
for resp in response.json().get("compose").get("include"):
oid = resp.get("valueSet")[0].split("/")[-1]
if oid not in oids:
Expand Down Expand Up @@ -99,7 +99,7 @@ def build_tes_mapping_files(oids: list) -> tuple[dict, dict]:
loinc_to_oids = {}
for oid in oids:
url = f"{TES_BASE_URL}{oid}"
response = requests.get(url, headers=headers)
response = requests.get(url, headers=TES_HEADERS)
snomed = (
response.json()
.get("useContext")[0]
Expand Down Expand Up @@ -151,21 +151,11 @@ def evaluate_mapping_files(oid_to_conditions: dict, loinc_to_oids: dict):
print(k, multiple_conditions[k])


def export_mapping_files(mapping: dict, filename: str):
"""
Exports the mapping dictionary to a JSON file.
:param mapping: Dictionary to save.
:param filename: Output filename.
"""
with open(filename, "w") as f:
json.dump(mapping, f, indent=2)


if __name__ == "__main__":
# Build ERSD mapping files
response, oids = get_ersd_valuesets(VSTYPE_ENDPOINTS)
oid_to_conditions, loinc_to_oids = build_ersd_mapping_files(response, oids)

evaluate_mapping_files(oid_to_conditions, loinc_to_oids)
export_mapping_files(oid_to_conditions, "data/accuracy_evaluation/oid_to_conditions.txt")
export_mapping_files(loinc_to_oids, "data/accuracy_evaluation/loinc_to_oids.txt")
export_json(oid_to_conditions, "data/accuracy_evaluation/oid_to_conditions.txt")
export_json(loinc_to_oids, "data/accuracy_evaluation/loinc_to_oids.txt")
66 changes: 66 additions & 0 deletions data/accuracy_evaluation/create_eval_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import json
import os

import requests
from dotenv import load_dotenv

load_dotenv()

USERNAME = os.environ.get("LOINC_USERNAME")
PASSWORD = os.environ.get("LOINC_PASSWORD")

url = "https://loinc.regenstrief.org/searchapi/loincs"


def search_loinc(raw_query: str):
"""
Searches the LOINC database using the provided raw query string.
:param raw_query: The raw query string to search for in the LOINC database.
"""
params = {"query": f'"{raw_query}"'}
response = requests.get(
url,
params=params,
auth=(USERNAME, PASSWORD),
timeout=30,
)
return response.json()


file_path = (
"/Users/rob/dibbs-text-to-code/data/accuracy_evaluation/eval_results_snippet_with_codes.jsonl"
)
with open(file_path, "r") as f:
raw_eval_data = [json.loads(line) for line in f if line.strip()]

eval_data = []
for item in raw_eval_data:
expected_loinc = search_loinc(item.get("expected_label"))
returned_loinc = search_loinc(item.get("top_predicted").get("label"))
eval_data.append(
{
"id": str(item.get("example_idx")) + "_" + str(item.get("k")),
"raw_text": item.get("query_input"),
"expected_text": item.get("expected_label"),
"returned_text": item.get("top_predicted").get("label"),
"expected_loinc": expected_loinc.get("Results")[0].get("LOINC_NUM")
if expected_loinc.get("ResponseSummary").get("RowsReturned") == 1
else None,
"returned_loinc": returned_loinc.get("Results")[0].get("LOINC_NUM")
if returned_loinc.get("ResponseSummary").get("RowsReturned") == 1
else None,
}
)

# pop the rows for now that have no loinc for sake of testing
eval_data = [
item
for item in eval_data
if item.get("expected_loinc") is not None and item.get("returned_loinc") is not None
]

with open(
"/Users/rob/dibbs-text-to-code/data/accuracy_evaluation/eval_results_snippet_with_codes.txt",
"w",
) as f:
json.dump(eval_data, f, indent=2)
Loading
Loading