Skip to content

Commit dafe99d

Browse files
authored
Merge pull request #18 from soda-inria/improve-brier-score-testing
Improve Brier score testing
2 parents 082e83d + 8417fef commit dafe99d

File tree

3 files changed

+468
-51
lines changed

3 files changed

+468
-51
lines changed

hazardous/_gradient_boosting_incidence.py

+20-9
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ class GradientBoostingIncidence(BaseEstimator, ClassifierMixin):
111111
112112
Estimate a cause-specific CIF by iteratively minimizing a stochastic time
113113
integrated proper scoring rule (Brier score or binary cross-entropy) for
114-
the kth cause of failure from _[1].
114+
the kth cause of failure as defined in equation 14 in [Kretowska2018]_.
115115
116116
Under the hood, this class uses randomly sampled reference time horizons
117117
concatenated as an extra input column to the underlying HGB binary
@@ -150,12 +150,12 @@ class GradientBoostingIncidence(BaseEstimator, ClassifierMixin):
150150
151151
time_horizon : float or int, default=None
152152
A specific time horizon `t_horizon` to treat the model as a
153-
probabilistic classifier to estimate `E[T_k < t_horizon|X]` where `T_k`
154-
is a random variable representing the (uncensored) event for the type
155-
of interest.
153+
probabilistic classifier to estimate ``𝔼[T < t_horizon, E = k|X]`` where
154+
``T`` is a random variable representing the (uncensored) event and ``E``
155+
a random categorical variable representing the (uncensored) event type.
156156
157-
When specified, the `predict_proba` method returns an estimate of
158-
`E[T_k < t_horizon|X]` for each provided realisation of `X`.
157+
When specified, the ``predict_proba`` method returns an estimate of
158+
``𝔼[T < t_horizon, E = k|X]`` for each provided realization of ``X``.
159159
160160
monotonic_incidence : str or False, default=False
161161
Whether to constrain the CIF to be monotonic with respect to time.
@@ -178,9 +178,20 @@ class GradientBoostingIncidence(BaseEstimator, ClassifierMixin):
178178
References
179179
----------
180180
181-
.. [1] M. Kretowska, Tree-based models for survival data with competing
182-
risks, Computer Methods and Programs in Biomedicine 159 (2018)
183-
185-198.
181+
.. [Graf1999] E. Graf, C. Schmoor, W. Sauerbrei, M. Schumacher, "Assessment
182+
and comparison of prognostic classification schemes for survival data",
183+
1999
184+
185+
.. [Kretowska2018] M. Kretowska, "Tree-based models for survival data with
186+
competing risks", 2018
187+
188+
.. [Gerds2006] T. Gerds and M. Schumacher, "Consistent Estimation of the
189+
Expected Brier Score in General Survival Models with Right-Censored
190+
Event Times", 2006
191+
192+
.. [Edwards2016] J. Edwards, L. Hester, M. Gokhale, C. Lesko,
193+
"Methodologic Issues When Estimating Risks in Pharmacoepidemiology.",
194+
2016, doi:10.1007/s40471-016-0089-1
184195
"""
185196

186197
def __init__(

hazardous/metrics/_brier_score.py

+113-41
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,16 @@ def brier_score_incidence(self, y_true, y_pred, times):
136136
else:
137137
event_true = event_true > 0
138138

139+
if y_pred.ndim != 2:
140+
raise ValueError(
141+
"'y_pred' must be a 2D array with shape (n_samples, n_times), got"
142+
f" shape {y_pred.shape}."
143+
)
144+
if y_pred.shape[0] != event_true.shape[0]:
145+
raise ValueError(
146+
"'y_true' and 'y_pred' must have the same number of samples, "
147+
f"got {event_true.shape[0]} and {y_pred.shape[0]} respectively."
148+
)
139149
if y_pred.shape[1] != times.shape[0]:
140150
raise ValueError(
141151
f"'times' length ({times.shape[0]}) "
@@ -193,44 +203,37 @@ def _weighted_binary_targets(self, y_event, y_duration, times, ipcw_y_duration):
193203
else:
194204
k = self.event_of_interest
195205

196-
# Specify the binary classification target for each record in y and
197-
# a reference time horizon:
206+
# Specify the binary classification target for each record in y and a
207+
# reference time horizon:
198208
#
199209
# - 1 when event of interest was observed before the reference time
200210
# horizon,
201211
#
202-
# - 0 otherwise: any other event happening at any time, censored record
203-
# or event of interest happening after the reference time horizon.
212+
# - 0 otherwise: any competing event happening at any time, censored
213+
# record or event of interest happening after the reference time
214+
# horizon.
204215
#
205216
# Note: censored events only contribute (as negative target) when
206217
# their duration is larger than the reference target horizon.
207218
# Otherwise, they are discarded by setting their weight to 0 in the
208219
# following.
209-
210-
y_binary = np.zeros(y_event.shape[0], dtype=np.int32)
211-
y_binary[(y_event == k) & (y_duration <= times)] = 1
212-
213-
# Compute the weights for each term contributing to the Brier score
214-
# at the specified time horizons.
215-
#
216-
# - error of a prediction for a time horizon before the occurence of an
217-
# event (either censored or uncensored) is weighted by the inverse
218-
# probability of censoring at that time horizon.
219220
#
220-
# - error of a prediction for a time horizon after the any observed event
221-
# is weighted by inverse censoring probability at the actual time
222-
# of the observed event.
221+
# Contrary to censored records, competing events always contribute as
222+
# negative targets. There weight is always non-zero but differ if
223+
# they happen either before or after the reference time horizon.
223224
#
224-
# - "error" of a prediction for a time horizon after a censored event has
225-
# 0 weight and do not contribute to the Brier score computation.
225+
# This IPCW scheme for survival analysis (binary events) is described
226+
# in [Graf1999] and is extended to multiple competing events in
227+
# [Kretowska2018].
228+
event_k_before_horizon = (y_event == k) & (y_duration <= times)
229+
y_binary = event_k_before_horizon.astype(np.int32)
226230

227-
# Estimate the probability of censoring at current time point t.
228231
ipcw_times = self.ipcw_est.compute_ipcw_at(times)
229-
before = times < y_duration
230-
weights = np.where(before, ipcw_times, 0)
232+
any_event_or_censoring_after_horizon = y_duration > times
233+
weights = np.where(any_event_or_censoring_after_horizon, ipcw_times, 0)
231234

232-
after_any_observed_event = (y_event > 0) & (y_duration <= times)
233-
weights = np.where(after_any_observed_event, ipcw_y_duration, weights)
235+
any_observed_event_before_horizon = (y_event > 0) & (y_duration <= times)
236+
weights = np.where(any_observed_event_before_horizon, ipcw_y_duration, weights)
234237

235238
return y_binary, weights
236239

@@ -257,6 +260,11 @@ def brier_score_survival(
257260
:math:`t`, estimated on the training set by the Kaplan-Meier estimator on the
258261
negation of the binary any-event indicator.
259262
263+
Note that this assumes independence between censoring and the covariates.
264+
When this assumption is violated, the IPCW weights are biased and the Brier
265+
score is not a proper scoring rule anymore. See [Gerds2006]_ for a study of this
266+
bias.
267+
260268
Parameters
261269
----------
262270
y_train : record-array, dictionnary or dataframe of shape (n_samples, 2)
@@ -287,6 +295,16 @@ def brier_score_survival(
287295
Returns
288296
-------
289297
brier_score : np.ndarray of shape (n_times)
298+
299+
References
300+
----------
301+
.. [Graf1999] E. Graf, C. Schmoor, W. Sauerbrei, M. Schumacher, "Assessment
302+
and comparison of prognostic classification schemes for survival data",
303+
1999
304+
305+
.. [Gerds2006] T. Gerds and M. Schumacher, "Consistent Estimation of the
306+
Expected Brier Score in General Survival Models with Right-Censored
307+
Event Times", 2006
290308
"""
291309
computer = IncidenceScoreComputer(
292310
y_train,
@@ -308,6 +326,11 @@ def integrated_brier_score_survival(
308326
\mathrm{IBS} = \frac{1}{t_{max} - t_{min}} \int^{t_{max}}_{t_{min}}
309327
\mathrm{BS}(u) du
310328
329+
Note that this assumes independence between censoring and the covariates.
330+
When this assumption is violated, the IPCW weights are biased and the Brier
331+
score is not a proper scoring rule anymore. See [Gerds2006]_ for a study of
332+
this bias.
333+
311334
Parameters
312335
----------
313336
y_train : record-array, dictionnary or dataframe of shape (n_samples, 2)
@@ -338,6 +361,16 @@ def integrated_brier_score_survival(
338361
Returns
339362
-------
340363
ibs : float
364+
365+
References
366+
----------
367+
.. [Graf1999] E. Graf, C. Schmoor, W. Sauerbrei, M. Schumacher, "Assessment
368+
and comparison of prognostic classification schemes for survival data",
369+
1999
370+
371+
.. [Gerds2006] T. Gerds and M. Schumacher, "Consistent Estimation of the
372+
Expected Brier Score in General Survival Models with Right-Censored
373+
Event Times", 2006
341374
"""
342375
computer = IncidenceScoreComputer(
343376
y_train,
@@ -360,32 +393,45 @@ def brier_score_incidence(
360393
\mathrm{BS}_k(t) = \frac{1}{n} \sum_{i=1}^n \hat{\omega}_i(t)
361394
(\mathbb{I}(t_i \leq t, \delta_i = k) - \hat{F}_k(t|\mathbf{x}_i))^2
362395
363-
where :math:`\hat{F}_k(t | \mathbf{x}_i)` is the estimate of the cumulative
364-
incidence for the kth event up to time point :math:`t` for a feature vector
365-
:math:`\mathbf{x}_i`, and
396+
where :math:`\hat{F}_k(t | \mathbf{x}_i)` is an estimate of the
397+
(uncensored) cumulative incidence for the kth event up to time point
398+
:math:`t` for a feature vector :math:`\mathbf{x}_i` [Edwards2016]_:
366399
367400
.. math::
368401
369-
\hat{\omega}_i(t)=\mathbb{I}(t_i \leq t, \delta_i \neq 0)/\hat{G}(t_i)
370-
+ \mathbb{I}(t_i > t)/\hat{G}(t)
402+
\hat{F}_k(t | \mathbf{x}_i) \approx P(T_i \leq t, E_i = k |
403+
\mathbf{x}_i)
371404
372-
are IPCW weigths based on the Kaplan-Meier estimate of the censoring
373-
distribution :math:`\hat{G}(t)`.
405+
and :math:`\hat{\omega}_i(t)` are IPCW weigths based on the Kaplan-Meier
406+
estimate of the censoring distribution :math:`\hat{G}(t)`:
407+
408+
.. math::
409+
410+
\hat{\omega}_i(t)=\frac{\mathbb{I}(t_i \leq t, \delta_i \neq
411+
0)}{\hat{G}(t_i)} + \frac{\mathbb{I}(t_i > t)}{\hat{G}(t)}
412+
413+
This scheme was introduced in [Graf1999]_ in the context of survival
414+
analysis and extended to competing events in [Kretowska2018]_.
415+
416+
Note that this assumes independence between censoring and the covariates.
417+
When this assumption is violated, the IPCW weights are biased and the Brier
418+
score is not a proper scoring rule anymore. See [Gerds2006]_ for a study of
419+
this bias.
374420
375421
Parameters
376422
----------
377423
y_train : record-array, dictionnary or dataframe of shape (n_samples, 2)
378-
The target, consisting in the 'event' and 'duration' columns.
379-
This is used to fit the IPCW estimator.
424+
The target, consisting in the 'event' and 'duration' columns. This is
425+
used to fit the IPCW estimator.
380426
381427
y_test : record-array, dictionnary or dataframe of shape (n_samples, 2)
382-
The ground truth, consisting in the 'event' and 'duration' columns.
383-
In the "event" column, `0` indicates censoring, and any other values
428+
The ground truth, consisting in the 'event' and 'duration' columns. In
429+
the "event" column, `0` indicates censoring, and any other values
384430
indicate competing event types.
385431
386432
y_pred : array-like of shape (n_samples, n_times)
387-
Incidence probability estimates predicted at ``times``.
388-
In the binary event settings, this is 1 - survival_probability.
433+
Incidence probability estimates predicted at ``times``. In the binary
434+
event settings, this is 1 - survival_probability.
389435
390436
times : array-like of shape (n_times)
391437
Times at which the survival probability ``y_pred`` has been estimated
@@ -411,9 +457,20 @@ def brier_score_incidence(
411457
412458
References
413459
----------
460+
.. [Graf1999] E. Graf, C. Schmoor, W. Sauerbrei, M. Schumacher, "Assessment
461+
and comparison of prognostic classification schemes for survival data",
462+
1999
463+
464+
.. [Kretowska2018] M. Kretowska, "Tree-based models for survival data with
465+
competing risks", 2018
414466
415-
[1] M. Kretowska, "Tree-based models for survival data with competing risks",
416-
Computer Methods and Programs in Biomedicine 159 (2018) 185-198.
467+
.. [Gerds2006] T. Gerds and M. Schumacher, "Consistent Estimation of the
468+
Expected Brier Score in General Survival Models with Right-Censored
469+
Event Times", 2006
470+
471+
.. [Edwards2016] J. Edwards, L. Hester, M. Gokhale, C. Lesko,
472+
"Methodologic Issues When Estimating Risks in Pharmacoepidemiology.",
473+
2016, doi:10.1007/s40471-016-0089-1
417474
"""
418475
# XXX: make times an optional kwarg to be compatible with
419476
# sksurv.metrics.brier_score?
@@ -442,6 +499,14 @@ def integrated_brier_score_incidence(
442499
\mathrm{IBS}_k = \frac{1}{t_{max} - t_{min}} \int^{t_{max}}_{t_{min}}
443500
\mathrm{BS}_k(u) du
444501
502+
This scheme was introduced in [Graf1999]_ for survival analysis and
503+
extended to competing events in [Kretowska2018]_.
504+
505+
Note that this assumes independence between censoring and the covariates.
506+
When this assumption is violated, the IPCW weights are biased and the Brier
507+
score is not a proper scoring rule anymore. See [Gerds2006]_ for a study of
508+
this bias.
509+
445510
Parameters
446511
----------
447512
y_train : record-array, dictionnary or dataframe of shape (n_samples, 2)
@@ -480,9 +545,16 @@ def integrated_brier_score_incidence(
480545
481546
References
482547
----------
548+
.. [Graf1999] E. Graf, C. Schmoor, W. Sauerbrei, M. Schumacher, "Assessment
549+
and comparison of prognostic classification schemes for survival data",
550+
1999
551+
552+
.. [Kretowska2018] M. Kretowska, "Tree-based models for survival data with
553+
competing risks", 2018
483554
484-
[1] M. Kretowska, "Tree-based models for survival data with competing risks",
485-
Computer Methods and Programs in Biomedicine 159 (2018) 185-198.
555+
.. [Gerds2006] T. Gerds and M. Schumacher, "Consistent Estimation of the
556+
Expected Brier Score in General Survival Models with Right-Censored
557+
Event Times", 2006
486558
"""
487559
computer = IncidenceScoreComputer(
488560
y_train,

0 commit comments

Comments
 (0)