-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Closed
Description
Hi team, thanks for the great work on tpot!
I found a few bugs that needed fixing for my use case and wanted to feed back the fixes that worked for me, in case useful:
- Subject: [PATCH] vendor/tpot: bugfix for feature mutation remove
.../tpot/search_spaces/nodes/genetic_feature_selection.py
diff --git a/vendor/tpot/tpot/search_spaces/nodes/genetic_feature_selection.py b/vendor/tpot/tpot/search_spaces/nodes/genetic_feature_selection.py
--- a/vendor/tpot/tpot/search_spaces/nodes/genetic_feature_selection.py
+++ b/vendor/tpot/tpot/search_spaces/nodes/genetic_feature_selection.py
@@ -183,8 +183,9 @@ def _mutate_remove(self, rng=None):
p = to_remove / num_pos
p = min(p, .5)
- remove_mask = rng.choice([True, False], size=self.mask.shape, p=[p,1-p])
- self.mask = np.logical_and(self.mask, remove_mask)
+ # bugfix for logic flaw
+ keep_mask = rng.choice([False, True], size=self.mask.shape, p=[p,1-p])
+ self.mask = np.logical_and(self.mask, keep_mask)
if sum(self.mask) == 0:
- Subject: [PATCH] vendor/tpot: bugfix threshold score
.../tpot/evolvers/base_evolver.py
diff --git a/vendor/tpot/tpot/evolvers/base_evolver.py b/vendor/tpot/tpot/evolvers/base_evolver.py
--- a/vendor/tpot/tpot/evolvers/base_evolver.py
+++ b/vendor/tpot/tpot/evolvers/base_evolver.py
@@ -891,8 +891,12 @@ def evaluate_population_selection_early_stop(self,survival_counts, thresholds=No
invalids = []
for i in range(len(offspring_scores)):
- if all([s*w>t*w for s,t,w in zip(offspring_scores[i],threshold,objective_function_signs) ]):
+ # fix: 'not' needed to fix logic (bug in original TPOT code meaning threshold logic was inverted); >= to make this work for scores that might be the same across all individuals in certain cases (e.g. on first of multiple stateful runs)
+ if not all([s*w>=t*w for s,t,w in zip(offspring_scores[i],threshold,objective_function_signs) ]):
invalids.append(i)
if len(invalids) > 0:
- Subject: [PATCH] vendor/tpot: small fixes
.../tpot/tpot_estimator/estimator.py
diff --git a/vendor/tpot/tpot/tpot_estimator/estimator.py b/vendor/tpot/tpot/tpot_estimator/estimator.py
--- a/vendor/tpot/tpot/tpot_estimator/estimator.py
+++ b/vendor/tpot/tpot/tpot_estimator/estimator.py
@@ -658,7 +658,7 @@ def objective_function(pipeline_individual,
if self.threshold_evaluation_pruning is not None or self.selection_evaluation_pruning is not None:
- evaluation_early_stop_steps = self.cv
+ evaluation_early_stop_steps = n_folds # fix to work with cv object instead of int
else:
evaluation_early_stop_steps = None
.../tpot/evolvers/base_evolver.py
diff --git a/vendor/tpot/tpot/evolvers/base_evolver.py b/vendor/tpot/tpot/evolvers/base_evolver.py
--- a/vendor/tpot/tpot/evolvers/base_evolver.py
+++ b/vendor/tpot/tpot/evolvers/base_evolver.py
@@ -908,7 +908,7 @@ def evaluate_population_selection_early_stop(self,survival_counts, thresholds=No
# Remove based on selection
if survival_counts is not None:
if step < self.evaluation_early_stop_steps - 1 and survival_counts[step]>1: #don't do selection for the last loop since they are completed
- k = survival_counts[step] + len(invalids) #TODO can remove the min if the selections method can ignore k>population size
+ k = survival_counts[step] # ambiguous which invalids objects from above this is supposed to refer to; removed (tbc)
if len(cur_individuals)> 1 and k > self.n_jobs and k < len(cur_individuals):
weighted_scores = np.array([s * self.objective_function_weights for s in offspring_scores ])
perib
Metadata
Metadata
Assignees
Labels
No labels