1
1
from __future__ import annotations
2
- from typing import Optional , Union , Dict , Tuple
2
+ from typing import Optional , Union , Dict , Tuple , Type
3
3
4
4
import numpy as np
5
5
import pytest
8
8
from sklearn .datasets import make_regression
9
9
from sklearn .linear_model import LinearRegression
10
10
from sklearn .ensemble import RandomForestRegressor
11
+ from sklearn .linear_model import QuantileRegressor
12
+ from lightgbm import LGBMRegressor
11
13
12
14
from mapie .subsample import Subsample
13
15
from mapie ._typing import ArrayLike
14
16
from mapie .conformity_scores import GammaConformityScore , \
15
17
AbsoluteConformityScore
16
18
from mapie_v1 .regression import SplitConformalRegressor , \
17
19
CrossConformalRegressor , \
18
- JackknifeAfterBootstrapRegressor
20
+ JackknifeAfterBootstrapRegressor , \
21
+ ConformalizedQuantileRegressor
19
22
20
23
from mapiev0 .regression import MapieRegressor as MapieRegressorV0 # noqa
24
+ from mapiev0 .regression import MapieQuantileRegressor as MapieQuantileRegressorV0 # noqa
21
25
from mapie_v1 .conformity_scores ._utils import \
22
26
check_and_select_regression_conformity_score
23
27
from mapie_v1 .integration_tests .utils import (filter_params ,
30
34
31
35
32
36
X , y_signed = make_regression (
33
- n_samples = 50 ,
37
+ n_samples = 100 ,
34
38
n_features = 10 ,
35
39
noise = 1.0 ,
36
40
random_state = RANDOM_STATE
37
41
)
38
42
y = np .abs (y_signed )
39
43
sample_weight = RandomState (RANDOM_STATE ).random (len (X ))
40
- groups = [0 ] * 10 + [1 ] * 10 + [2 ] * 10 + [3 ] * 10 + [4 ] * 10
44
+ groups = [0 ] * 20 + [1 ] * 20 + [2 ] * 20 + [3 ] * 20 + [4 ] * 20
41
45
positive_predictor = TransformedTargetRegressor (
42
46
regressor = LinearRegression (),
43
47
func = lambda y_ : np .log (y_ + 1 ),
@@ -105,9 +109,9 @@ def test_intervals_and_predictions_exact_equality_split(
105
109
"random_state" : RANDOM_STATE ,
106
110
}
107
111
108
- v0 , v1 = initialize_models (cv , v0_params , v1_params )
109
- compare_model_predictions_and_intervals (v0 = v0 ,
110
- v1 = v1 ,
112
+ v0 , v1 = select_models_by_strategy (cv )
113
+ compare_model_predictions_and_intervals (model_v0 = v0 ,
114
+ model_v1 = v1 ,
111
115
X = X_split ,
112
116
y = y_split ,
113
117
v0_params = v0_params ,
@@ -184,7 +188,7 @@ def test_intervals_and_predictions_exact_equality_cross(params_cross):
184
188
v0_params = params_cross ["v0" ]
185
189
v1_params = params_cross ["v1" ]
186
190
187
- v0 , v1 = initialize_models ("cross" , v0_params , v1_params )
191
+ v0 , v1 = select_models_by_strategy ("cross" )
188
192
compare_model_predictions_and_intervals (v0 , v1 , X , y , v0_params , v1_params )
189
193
190
194
@@ -264,58 +268,178 @@ def test_intervals_and_predictions_exact_equality_cross(params_cross):
264
268
]
265
269
266
270
271
+ split_model = QuantileRegressor (
272
+ solver = "highs-ds" ,
273
+ alpha = 0.0 ,
274
+ )
275
+
276
+ lgbm_models = []
277
+ lgbm_alpha = 0.1
278
+ for alpha_ in [lgbm_alpha / 2 , (1 - (lgbm_alpha / 2 )), 0.5 ]:
279
+ estimator_ = LGBMRegressor (
280
+ objective = 'quantile' ,
281
+ alpha = alpha_ ,
282
+ )
283
+ lgbm_models .append (estimator_ )
284
+
285
+
267
286
@pytest .mark .parametrize ("params_jackknife" , params_test_cases_jackknife )
268
287
def test_intervals_and_predictions_exact_equality_jackknife (params_jackknife ):
269
288
v0_params = params_jackknife ["v0" ]
270
289
v1_params = params_jackknife ["v1" ]
271
290
272
- v0 , v1 = initialize_models ("jackknife" , v0_params , v1_params )
291
+ v0 , v1 = select_models_by_strategy ("jackknife" )
273
292
compare_model_predictions_and_intervals (v0 , v1 , X , y , v0_params , v1_params )
274
293
275
294
276
- def initialize_models (
277
- strategy_key : str ,
278
- v0_params : Dict ,
279
- v1_params : Dict ,
280
- ) -> Tuple [MapieRegressorV0 , Union [
281
- SplitConformalRegressor ,
282
- CrossConformalRegressor ,
283
- JackknifeAfterBootstrapRegressor
284
- ]]:
295
+ params_test_cases_quantile = [
296
+ {
297
+ "v0" : {
298
+ "alpha" : 0.2 ,
299
+ "cv" : "split" ,
300
+ "method" : "quantile" ,
301
+ "calib_size" : 0.3 ,
302
+ "sample_weight" : sample_weight ,
303
+ "random_state" : RANDOM_STATE ,
304
+ },
305
+ "v1" : {
306
+ "confidence_level" : 0.8 ,
307
+ "prefit" : False ,
308
+ "test_size" : 0.3 ,
309
+ "fit_params" : {"sample_weight" : sample_weight },
310
+ "random_state" : RANDOM_STATE ,
311
+ },
312
+ },
313
+ {
314
+ "v0" : {
315
+ "estimator" : lgbm_models ,
316
+ "alpha" : lgbm_alpha ,
317
+ "cv" : "prefit" ,
318
+ "method" : "quantile" ,
319
+ "calib_size" : 0.3 ,
320
+ "sample_weight" : sample_weight ,
321
+ "optimize_beta" : True ,
322
+ "random_state" : RANDOM_STATE ,
323
+ },
324
+ "v1" : {
325
+ "estimator" : lgbm_models ,
326
+ "confidence_level" : 1 - lgbm_alpha ,
327
+ "prefit" : True ,
328
+ "test_size" : 0.3 ,
329
+ "fit_params" : {"sample_weight" : sample_weight },
330
+ "minimize_interval_width" : True ,
331
+ "random_state" : RANDOM_STATE ,
332
+ },
333
+ },
334
+ {
335
+ "v0" : {
336
+ "estimator" : split_model ,
337
+ "alpha" : 0.5 ,
338
+ "cv" : "split" ,
339
+ "method" : "quantile" ,
340
+ "calib_size" : 0.3 ,
341
+ "allow_infinite_bounds" : True ,
342
+ "random_state" : RANDOM_STATE ,
343
+ },
344
+ "v1" : {
345
+ "estimator" : split_model ,
346
+ "confidence_level" : 0.5 ,
347
+ "prefit" : False ,
348
+ "test_size" : 0.3 ,
349
+ "allow_infinite_bounds" : True ,
350
+ "random_state" : RANDOM_STATE ,
351
+ },
352
+ },
353
+ {
354
+ "v0" : {
355
+ "alpha" : 0.1 ,
356
+ "cv" : "split" ,
357
+ "method" : "quantile" ,
358
+ "calib_size" : 0.3 ,
359
+ "random_state" : RANDOM_STATE ,
360
+ "symmetry" : False
361
+ },
362
+ "v1" : {
363
+ "confidence_level" : 0.9 ,
364
+ "prefit" : False ,
365
+ "test_size" : 0.3 ,
366
+ "random_state" : RANDOM_STATE ,
367
+ "symmetric_intervals" : False ,
368
+ },
369
+ },
370
+ ]
285
371
286
- v1 : Union [SplitConformalRegressor ,
287
- CrossConformalRegressor ,
288
- JackknifeAfterBootstrapRegressor ]
372
+
373
+ @pytest .mark .parametrize ("params_quantile" , params_test_cases_quantile )
374
+ def test_intervals_and_predictions_exact_equality_quantile (params_quantile ):
375
+ v0_params = params_quantile ["v0" ]
376
+ v1_params = params_quantile ["v1" ]
377
+
378
+ test_size = v1_params ["test_size" ] if "test_size" in v1_params else None
379
+ prefit = ("prefit" in v1_params ) and v1_params ["prefit" ]
380
+
381
+ v0 , v1 = select_models_by_strategy ("quantile" )
382
+ compare_model_predictions_and_intervals (model_v0 = v0 ,
383
+ model_v1 = v1 ,
384
+ X = X ,
385
+ y = y ,
386
+ v0_params = v0_params ,
387
+ v1_params = v1_params ,
388
+ test_size = test_size ,
389
+ prefit = prefit ,
390
+ random_state = RANDOM_STATE )
391
+
392
+
393
+ def select_models_by_strategy (
394
+ strategy_key : str
395
+ ) -> Tuple [
396
+ Type [Union [MapieRegressorV0 , MapieQuantileRegressorV0 ]],
397
+ Type [Union [
398
+ SplitConformalRegressor ,
399
+ CrossConformalRegressor ,
400
+ JackknifeAfterBootstrapRegressor ,
401
+ ConformalizedQuantileRegressor
402
+ ]]
403
+ ]:
404
+
405
+ model_v0 : Type [Union [MapieRegressorV0 , MapieQuantileRegressorV0 ]]
406
+ model_v1 : Type [Union [
407
+ SplitConformalRegressor ,
408
+ CrossConformalRegressor ,
409
+ JackknifeAfterBootstrapRegressor ,
410
+ ConformalizedQuantileRegressor
411
+ ]]
289
412
290
413
if strategy_key in ["split" , "prefit" ]:
291
- v1_params = filter_params ( SplitConformalRegressor . __init__ , v1_params )
292
- v1 = SplitConformalRegressor ( ** v1_params )
414
+ model_v1 = SplitConformalRegressor
415
+ model_v0 = MapieRegressorV0
293
416
294
417
elif strategy_key == "cross" :
295
- v1_params = filter_params ( CrossConformalRegressor . __init__ , v1_params )
296
- v1 = CrossConformalRegressor ( ** v1_params )
418
+ model_v1 = CrossConformalRegressor
419
+ model_v0 = MapieRegressorV0
297
420
298
421
elif strategy_key == "jackknife" :
299
- v1_params = filter_params (
300
- JackknifeAfterBootstrapRegressor .__init__ ,
301
- v1_params
302
- )
303
- v1 = JackknifeAfterBootstrapRegressor (** v1_params )
422
+ model_v1 = JackknifeAfterBootstrapRegressor
423
+ model_v0 = MapieRegressorV0
424
+
425
+ elif strategy_key == "quantile" :
426
+ model_v1 = ConformalizedQuantileRegressor
427
+ model_v0 = MapieQuantileRegressorV0
304
428
305
429
else :
306
430
raise ValueError (f"Unknown strategy key: { strategy_key } " )
307
431
308
- v0_params = filter_params (MapieRegressorV0 .__init__ , v0_params )
309
- v0 = MapieRegressorV0 (** v0_params )
310
-
311
- return v0 , v1
432
+ return model_v0 , model_v1
312
433
313
434
314
435
def compare_model_predictions_and_intervals (
315
- v0 : MapieRegressorV0 ,
316
- v1 : Union [SplitConformalRegressor ,
317
- CrossConformalRegressor ,
318
- JackknifeAfterBootstrapRegressor ],
436
+ model_v0 : Type [MapieRegressorV0 ],
437
+ model_v1 : Type [Union [
438
+ SplitConformalRegressor ,
439
+ CrossConformalRegressor ,
440
+ JackknifeAfterBootstrapRegressor ,
441
+ ConformalizedQuantileRegressor
442
+ ]],
319
443
X : ArrayLike ,
320
444
y : ArrayLike ,
321
445
v0_params : Dict = {},
@@ -332,16 +456,28 @@ def compare_model_predictions_and_intervals(
332
456
else :
333
457
X_train , X_conf , y_train , y_conf = X , X , y , y
334
458
459
+ if prefit :
460
+ estimator = v0_params ["estimator" ]
461
+ if isinstance (estimator , list ):
462
+ for single_estimator in estimator :
463
+ single_estimator .fit (X_train , y_train )
464
+ else :
465
+ estimator .fit (X_train , y_train )
466
+
467
+ v0_params ["estimator" ] = estimator
468
+ v1_params ["estimator" ] = estimator
469
+
470
+ v0_init_params = filter_params (model_v0 .__init__ , v0_params )
471
+ v1_init_params = filter_params (model_v1 .__init__ , v1_params )
472
+
473
+ v0 = model_v0 (** v0_init_params )
474
+ v1 = model_v1 (** v1_init_params )
475
+
335
476
v0_fit_params = filter_params (v0 .fit , v0_params )
336
477
v1_fit_params = filter_params (v1 .fit , v1_params )
337
478
v1_conformalize_params = filter_params (v1 .conformalize , v1_params )
338
479
339
480
if prefit :
340
- estimator = v0 .estimator
341
- estimator .fit (X_train , y_train )
342
- v0 .estimator = estimator
343
- v1 ._mapie_regressor .estimator = estimator
344
-
345
481
v0 .fit (X_conf , y_conf , ** v0_fit_params )
346
482
else :
347
483
v0 .fit (X , y , ** v0_fit_params )
0 commit comments