Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit e15867d

Browse files
committedDec 17, 2024·
initial version of icd analysis
1 parent 2f74878 commit e15867d

File tree

1 file changed

+301
-0
lines changed

1 file changed

+301
-0
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
"""
2+
.. _icd_val_results:
3+
4+
Performance of the initial contact algorithms on the TVS dataset
5+
==============================================================
6+
7+
.. warning:: On this page you will find preliminary results for a standardized revalidation of the pipeline and all
8+
of its algorithm.
9+
The current state, **TECHNICAL EXPERIMENTATION**.
10+
Don't use these results or make any assumptions based on them.
11+
We will update this page incrementally and provide further information, as soon as the state of any of the validation
12+
steps changes.
13+
14+
The following provides an analysis and comparison of the icd performance on the TVS dataset (lab and free-living).
15+
We look into the actual performance of the algorithms compared to the reference data and compare these results with
16+
the performance of the original matlab algorithm.
17+
18+
.. note:: If you are interested in how these results are calculated, head over to the
19+
:ref:`processing page <icd_val_gen>`.
20+
21+
We focus on the `single_results` (aka the performance per trail) and will aggregate it over multiple levels.
22+
23+
"""
24+
25+
# %%
26+
# Below are the list of algorithms that we will compare.
27+
# Note, that we use the prefix "new" to refer to the reimplemented python algorithms and "orig" to refer to the
28+
# original matlab algorithms.
29+
algorithms = {
30+
"IcdIonescu": ("IcdIonescu", "new"),
31+
"IcdShinImproved": ("IcdShinImproved", "new"),
32+
"IcdHKLeeImproved": ("IcdHKLeeImproved", "new"),
33+
}
34+
# We only load the matlab algorithms that were also reimplemented
35+
algorithms.update(
36+
{
37+
"matlab_Ani_McCamley": ("Ani_McCamley", "orig"),
38+
}
39+
)
40+
41+
# %%
42+
# The code below loads the data and prepares it for the analysis.
43+
# By default, the data will be downloaded from an online repository (and cached locally).
44+
# If you want to use a local copy of the data, you can set the `MOBGAP_VALIDATION_DATA_PATH` environment variable.
45+
# and the MOBGAP_VALIDATION_USE_LOCA_DATA to `1`.
46+
#
47+
# The file download will print a couple log information, which can usually be ignored.
48+
# You can also change the `version` parameter to load a different version of the data.
49+
from pathlib import Path
50+
51+
import pandas as pd
52+
from mobgap.data.validation_results import ValidationResultLoader
53+
from mobgap.utils.misc import get_env_var
54+
55+
local_data_path = (
56+
Path(get_env_var("MOBGAP_VALIDATION_DATA_PATH")) / "results"
57+
if int(get_env_var("MOBGAP_VALIDATION_USE_LOCAL_DATA", 0))
58+
else None
59+
)
60+
loader = ValidationResultLoader(
61+
"icd", result_path=local_data_path, version="main"
62+
)
63+
64+
65+
free_living_index_cols = [
66+
"cohort",
67+
"participant_id",
68+
"time_measure",
69+
"recording",
70+
"recording_name",
71+
"recording_name_pretty",
72+
]
73+
74+
results = {
75+
v: loader.load_single_results(k, "free_living")
76+
for k, v in algorithms.items()
77+
}
78+
results = pd.concat(results, names=["algo", "version", *free_living_index_cols])
79+
results_long = results.reset_index().assign(
80+
algo_with_version=lambda df: df["algo"] + " (" + df["version"] + ")",
81+
_combined="combined",
82+
)
83+
cohort_order = ["HA", "CHF", "COPD", "MS", "PD", "PFF"]
84+
# %%
85+
# Performance metrics
86+
# -------------------
87+
# For each participant, performance metrics were calculated by classifying each sample in the recording as either
88+
# TP, FP, or FN.
89+
# Based on these values recall (sensitivity), precision (positive predictive value), F1 score were calculated.
90+
# On top of that the duration of overall detected initial contact per participant was calculated.
91+
# From this we calculate the mean and confidence interval for both systems, the bias and limits of agreement (LoA)
92+
# between the algorithm output and the reference data, the absolute error and the ICC.
93+
#
94+
# Below the functions that calculate these metrics are defined.
95+
from functools import partial
96+
97+
from mobgap.pipeline.evaluation import CustomErrorAggregations as A
98+
from mobgap.utils.df_operations import (
99+
CustomOperation,
100+
apply_aggregations,
101+
apply_transformations,
102+
)
103+
from mobgap.utils.tables import FormatTransformer as F
104+
105+
custom_aggs = [
106+
CustomOperation(
107+
identifier=None,
108+
function=A.n_datapoints,
109+
column_name=[("n_datapoints", "all")],
110+
),
111+
("recall", ["mean", A.conf_intervals]),
112+
("precision", ["mean", A.conf_intervals]),
113+
("f1_score", ["mean", A.conf_intervals]),
114+
("ic_absolute_error_s", ["mean", A.loa]),
115+
("ic_relative_error", ["mean", A.loa]),
116+
]
117+
118+
format_transforms = [
119+
CustomOperation(
120+
identifier=None,
121+
function=lambda df_: df_[("n_datapoints", "all")].astype(int),
122+
column_name=("General", "n_datapoints"),
123+
),
124+
*(
125+
CustomOperation(
126+
identifier=None,
127+
function=partial(
128+
F.value_with_range,
129+
value_col=("mean", c),
130+
range_col=("conf_intervals", c),
131+
),
132+
column_name=("ICD", c),
133+
)
134+
for c in [
135+
"recall",
136+
"precision",
137+
"f1_score",
138+
]
139+
),
140+
*(
141+
CustomOperation(
142+
identifier=None,
143+
function=partial(
144+
F.value_with_range,
145+
value_col=("mean", c),
146+
range_col=("loa", c),
147+
),
148+
column_name=("IC Duration", c),
149+
)
150+
for c in [
151+
"ic_absolute_error_s",
152+
"ic_relative_error",
153+
]
154+
),
155+
]
156+
157+
final_names = {
158+
"n_datapoints": "# recordings",
159+
"recall": "Recall",
160+
"precision": "Precision",
161+
"f1_score": "F1 Score",
162+
"ic_absolute_error_s": "Abs. Error [s]",
163+
"ic_relative_error": "Bias and LoA",
164+
}
165+
166+
167+
def format_results(df: pd.DataFrame) -> pd.DataFrame:
168+
return (
169+
df.pipe(apply_transformations, format_transforms)
170+
.rename(columns=final_names)
171+
.loc[:, pd.IndexSlice[:, list(final_names.values())]]
172+
)
173+
174+
175+
# %%
176+
# Free-Living Comparison
177+
# ----------------------
178+
# We focus the comparison on the free-living data, as this is the most relevant considering our final use-case.
179+
# In the free-living data, there is one 2.5 hour recording per participant.
180+
# This means, each datapoint in the plots below and in the summary statistics represents one participant.
181+
#
182+
# All results across all cohorts
183+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
184+
import matplotlib.pyplot as plt
185+
import seaborn as sns
186+
187+
hue_order = ["orig", "new"]
188+
189+
fig, ax = plt.subplots()
190+
sns.boxplot(
191+
data=results_long,
192+
x="algo",
193+
y="f1_score",
194+
hue="version",
195+
hue_order=hue_order,
196+
ax=ax,
197+
)
198+
fig.show()
199+
200+
perf_metrics_all = (
201+
results.groupby(["algo", "version"])
202+
.apply(apply_aggregations, custom_aggs)
203+
.pipe(format_results)
204+
)
205+
perf_metrics_all
206+
207+
# %%
208+
# Per Cohort
209+
# ~~~~~~~~~~
210+
# While this provides a good overview, it does not fully reflect how these algorithms perform on the different cohorts.
211+
fig, ax = plt.subplots()
212+
sns.boxplot(
213+
data=results_long, x="cohort", y="f1_score", hue="algo_with_version", ax=ax
214+
)
215+
fig.show()
216+
217+
perf_metrics_per_cohort = (
218+
results.groupby(["cohort", "algo", "version"])
219+
.apply(apply_aggregations, custom_aggs)
220+
.pipe(format_results)
221+
.loc[cohort_order]
222+
)
223+
perf_metrics_per_cohort
224+
225+
# %%
226+
# Per relevant cohort
227+
# ~~~~~~~~~~~~~~~~~~~
228+
# Overview over all cohorts is good, but this is not how the icd algorithms are used in our main pipeline.
229+
# Here, the HA, CHF, and COPD cohort use the ``IcdIonescu` algorithm, while the ``IcdShinImproved`` algorithm is used
230+
# for the MS, PD, PFF cohorts. # TODO: Check if this is the case
231+
# Let's look at the performance of these algorithms on the respective cohorts.
232+
from mobgap.pipeline import MobilisedPipelineHealthy, MobilisedPipelineImpaired
233+
234+
low_impairment_algo = "IcdIonescu"
235+
low_impairment_cohorts = list(MobilisedPipelineHealthy().recommended_cohorts)
236+
237+
low_impairment_results = results_long[
238+
results_long["cohort"].isin(low_impairment_cohorts)
239+
].query("algo == @low_impairment_algo")
240+
241+
fig, ax = plt.subplots()
242+
sns.boxplot(
243+
data=low_impairment_results,
244+
x="cohort",
245+
y="f1_score",
246+
hue="version",
247+
hue_order=hue_order,
248+
ax=ax,
249+
)
250+
sns.boxplot(
251+
data=low_impairment_results,
252+
x="_combined",
253+
y="f1_score",
254+
hue="version",
255+
hue_order=hue_order,
256+
legend=False,
257+
ax=ax,
258+
)
259+
fig.suptitle(f"Low Impairment Cohorts ({low_impairment_algo})")
260+
fig.show()
261+
262+
# %%
263+
perf_metrics_per_cohort.loc[
264+
pd.IndexSlice[low_impairment_cohorts, low_impairment_algo], :
265+
].reset_index("algo", drop=True)
266+
267+
# %%
268+
high_impairment_algo = "IcdShinImproved"
269+
high_impairment_cohorts = list(MobilisedPipelineImpaired().recommended_cohorts)
270+
271+
high_impairment_results = results_long[
272+
results_long["cohort"].isin(high_impairment_cohorts)
273+
].query("algo == @high_impairment_algo")
274+
275+
hue_order = ["orig", "new"]
276+
277+
fig, ax = plt.subplots()
278+
sns.boxplot(
279+
data=high_impairment_results,
280+
x="cohort",
281+
y="f1_score",
282+
hue="version",
283+
hue_order=hue_order,
284+
ax=ax,
285+
)
286+
sns.boxplot(
287+
data=high_impairment_results,
288+
x="_combined",
289+
y="f1_score",
290+
hue="version",
291+
hue_order=hue_order,
292+
legend=False,
293+
ax=ax,
294+
)
295+
fig.suptitle(f"High Impairment Cohorts ({high_impairment_algo})")
296+
fig.show()
297+
298+
# %%
299+
perf_metrics_per_cohort.loc[
300+
pd.IndexSlice[high_impairment_cohorts, high_impairment_algo], :
301+
].reset_index("algo", drop=True)

0 commit comments

Comments
 (0)