1
1
import numpy as np
2
2
from numba import njit
3
3
import sobol_seq
4
- import kernel_herding
4
+ import kernel_methods
5
5
from itertools import count
6
6
from math import cos , gamma , pi , sin , sqrt
7
7
from typing import Callable , Iterator , List
@@ -84,14 +84,16 @@ def owen_complement(X_background, X_foreground, predict_function, n_samples):
84
84
85
85
86
86
@njit
87
- def _accumulate_samples_castro (phi , predictions , j ):
87
+ def _accumulate_samples_castro (phi , predictions , j , weights = None ):
88
+ if weights == None :
89
+ weights = np .full (predictions .shape [1 ], 1 / predictions .shape [1 ])
88
90
for foreground_idx in range (predictions .shape [0 ]):
89
91
for sample_idx in range (predictions .shape [1 ]):
90
92
phi [foreground_idx , j [sample_idx ]] += predictions [foreground_idx ][
91
- sample_idx ] / predictions . shape [ 1 ]
93
+ sample_idx ] * weights [ sample_idx ]
92
94
93
95
94
- def estimate_shap_given_permutations (X_background , X_foreground , predict_function , p ):
96
+ def estimate_shap_given_permutations (X_background , X_foreground , predict_function , p , weights = None ):
95
97
n_features = X_background .shape [1 ]
96
98
phi = np .zeros ((X_foreground .shape [0 ], n_features ))
97
99
n_permutations = p .shape [0 ]
@@ -105,7 +107,7 @@ def estimate_shap_given_permutations(X_background, X_foreground, predict_functio
105
107
predictions = (pred_on - pred_off ).reshape (
106
108
(X_foreground .shape [0 ], mask .shape [0 ], X_background .shape [0 ]))
107
109
predictions = np .mean (predictions , axis = 2 )
108
- _accumulate_samples_castro (phi , predictions , j )
110
+ _accumulate_samples_castro (phi , predictions , j , weights )
109
111
pred_off = pred_on
110
112
111
113
return phi
@@ -123,6 +125,28 @@ def monte_carlo(X_background, X_foreground, predict_function, n_samples):
123
125
return estimate_shap_given_permutations (X_background , X_foreground , predict_function , p )
124
126
125
127
128
+ def monte_carlo_weighted (X_background , X_foreground , predict_function , n_samples ):
129
+ n_features = X_background .shape [1 ]
130
+ assert n_samples % (n_features + 1 ) == 0
131
+ # castro is allowed to take 2 * more samples than owen as it reuses predictions
132
+ samples_per_feature = 2 * (n_samples // (n_features + 1 ))
133
+ p = np .zeros ((samples_per_feature , n_features ), dtype = np .int64 )
134
+ for i in range (samples_per_feature ):
135
+ p [i ] = np .random .permutation (n_features )
136
+ weights = kernel_methods .compute_bayesian_weights (p , kernel_methods .kt_kernel )
137
+ return estimate_shap_given_permutations (X_background , X_foreground , predict_function , p ,
138
+ weights )
139
+
140
+
141
+ def sbq (X_background , X_foreground , predict_function , n_samples ):
142
+ n_features = X_background .shape [1 ]
143
+ assert n_samples % (n_features + 1 ) == 0
144
+ samples_per_feature = 2 * (n_samples // (n_features + 1 ))
145
+ p , w = kernel_methods .sequential_bayesian_quadrature (samples_per_feature , n_features )
146
+ return estimate_shap_given_permutations (X_background , X_foreground , predict_function , p ,
147
+ w )
148
+
149
+
126
150
def monte_carlo_antithetic (X_background , X_foreground , predict_function , n_samples ):
127
151
n_features = X_background .shape [1 ]
128
152
@@ -157,6 +181,7 @@ def sobol_sphere_permutations(n_samples, n_features):
157
181
158
182
return np .argsort (sobol , axis = 1 )
159
183
184
+
160
185
# sample with l ones and i off
161
186
def draw_castro_stratified_samples (n_samples , n_features , i , l ):
162
187
mask = np .zeros ((n_samples , n_features - 1 ), dtype = bool )
@@ -272,7 +297,7 @@ def kt_herding(X_background, X_foreground, predict_function, n_samples):
272
297
assert n_samples % (n_features + 1 ) == 0
273
298
# castro is allowed to take 2 * more samples than owen as it reuses predictions
274
299
samples_per_feature = 2 * (n_samples // (n_features + 1 ))
275
- p = kernel_herding .kt_herding_permutations (samples_per_feature , n_features )
300
+ p = kernel_methods .kt_herding_permutations (samples_per_feature , n_features )
276
301
return estimate_shap_given_permutations (X_background , X_foreground , predict_function , p )
277
302
278
303
@@ -330,6 +355,16 @@ def orthogonal(X_background, X_foreground, predict_function, n_samples):
330
355
return estimate_shap_given_permutations (X_background , X_foreground , predict_function , p )
331
356
332
357
358
+ def orthogonal_weighted (X_background , X_foreground , predict_function , n_samples ):
359
+ n_features = X_background .shape [1 ]
360
+ assert n_samples % (2 * (n_features + 1 )) == 0
361
+ # castro is allowed to take 2 * more samples than owen as it reuses predictions
362
+ samples_per_feature = 2 * (n_samples // (n_features + 1 ))
363
+ p = _orthogonal_permutations (samples_per_feature , n_features )
364
+ w = kernel_methods .compute_bayesian_weights (p , kernel_methods .kt_kernel )
365
+ return estimate_shap_given_permutations (X_background , X_foreground , predict_function , p , w )
366
+
367
+
333
368
def _int_sin_m (x : float , m : int ) -> float :
334
369
"""Computes the integral of sin^m(t) dt from 0 to x recursively"""
335
370
if m == 0 :
@@ -438,6 +473,10 @@ def fibonacci(X_background, X_foreground, predict_function, n_samples):
438
473
def min_sample_size (alg , n_features ):
439
474
if alg == monte_carlo :
440
475
return n_features + 1
476
+ elif alg == monte_carlo_weighted :
477
+ return n_features + 1
478
+ elif alg == sbq :
479
+ return n_features + 1
441
480
elif alg == qmc_sobol :
442
481
return n_features + 1
443
482
elif alg == fibonacci :
@@ -450,6 +489,8 @@ def min_sample_size(alg, n_features):
450
489
return 2 * (n_features + 1 )
451
490
elif alg == orthogonal :
452
491
return 2 * (n_features + 1 )
492
+ elif alg == orthogonal_weighted :
493
+ return 2 * (n_features + 1 )
453
494
elif alg == owen or alg == owen_complement :
454
495
return n_features * 4
455
496
elif alg == castro_stratified :
0 commit comments