Skip to content

Commit c9f9550

Browse files
authored
Merge pull request #47 from wwu-mmll/develop
New PHOTONAI Version
2 parents 79d9725 + aa77b16 commit c9f9550

File tree

19 files changed

+211
-77
lines changed

19 files changed

+211
-77
lines changed

documentation/docs/algorithms/hpos.md

+5
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ pipe = Hyperpipe("...",
3232
'limit_in_minutes': 20})
3333
```
3434

35+
If the `optimizer_params` contain a time and numerical limit, both limits are
36+
considered by aborting if either of the limits is met.
37+
38+
The default limit for Random Search is `n_configurations=10`.
39+
3540
<h3>Scikit-Optimize</h3>
3641
Scikit-Optimize, or skopt, is a simple and efficient library to
3742
minimize (very) expensive and noisy black-box functions.

examples/advanced/regression_with_constraints.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from photonai.base import Hyperpipe, PipelineElement, OutputSettings
55
from photonai.optimization import MinimumPerformanceConstraint, DummyPerformanceConstraint, BestPerformanceConstraint, IntegerRange
66

7+
import matplotlib.pyplot as plt
78

89
my_pipe = Hyperpipe(name='constrained_forest_pipe',
910
optimizer='grid_search',
@@ -14,8 +15,8 @@
1415
use_test_set=True,
1516
verbosity=1,
1617
project_folder='./tmp',
17-
output_settings=OutputSettings(mongodb_connect_url="mongodb://localhost:27017/photon_results",
18-
save_output=True),
18+
# output_settings=OutputSettings(mongodb_connect_url="mongodb://localhost:27017/photon_results",
19+
# save_output=True),
1920
performance_constraints=[DummyPerformanceConstraint('mean_absolute_error'),
2021
MinimumPerformanceConstraint('pearson_correlation', 0.65, 'any'),
2122
BestPerformanceConstraint('mean_squared_error', 3, 'mean')])
@@ -26,3 +27,21 @@
2627

2728
X, y = load_boston(return_X_y=True)
2829
my_pipe.fit(X, y)
30+
31+
32+
## plot Scatter plot
33+
34+
train_df = my_pipe.results_handler.get_mean_train_predictions()
35+
pred_df = my_pipe.results_handler.get_test_predictions()
36+
37+
max_value = int(max(max(pred_df['y_true']), max(pred_df['y_pred']), max(train_df['y_pred'])))
38+
39+
fig, main_axes = plt.subplots()
40+
main_axes.plot(range(max_value), range(max_value), color='black')
41+
test_set = main_axes.scatter(pred_df["y_true"], pred_df["y_pred"], label="Test")
42+
train_set = main_axes.scatter(train_df["y_true"], train_df["y_pred"], label="Training")
43+
main_axes.legend(handles=[test_set, train_set], loc='lower right')
44+
main_axes.set_xlabel("y true")
45+
main_axes.set_ylabel("y predicted")
46+
47+
plt.show()

examples/basic/imbalanced_data.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,14 @@
2828
my_pipe += PipelineElement('StandardScaler')
2929

3030
tested_methods = Categorical(['RandomOverSampler', 'SMOTEENN', 'SVMSMOTE',
31-
'BorderlineSMOTE', 'SMOTE', 'ClusterCentroids'])
31+
'BorderlineSMOTE', 'SMOTE'])
32+
33+
# Only SMOTE got a different input parameter.
34+
# All other strategies stay with the default setting.
35+
# Please do not try to optimize over this parameter (not use config inside the 'hyperparameters').
3236
my_pipe += PipelineElement('ImbalancedDataTransformer',
3337
hyperparameters={'method_name': tested_methods},
38+
config={"SMOTE": {"k_neighbors": 3}},
3439
test_disabled=True)
3540

3641
my_pipe += PipelineElement("RandomForestClassifier", n_estimators=200)

photonai/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@
1313
1414
"""
1515

16-
__version__ = '2.2.0'
16+
__version__ = '2.2.1'
1717

1818
# __all__ = ( )

photonai/base/hyperpipe.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -305,9 +305,9 @@ def __init__(self, name: Optional[str],
305305
- "grid_search": Optimizer that iteratively tests all possible hyperparameter combinations.
306306
- "random_grid_search": A variation of the grid search optimization that randomly picks
307307
hyperparameter combinations from all possible hyperparameter combinations.
308-
- "sk_opt": Scikit-Optimize based on theories of Baysian optimization.
308+
- "sk_opt": Scikit-Optimize based on theories of bayesian optimization.
309309
- "random_search": randomly chooses hyperparameter from grid-free domain.
310-
- "smac": SMAC based on theories of Baysian optimization.
310+
- "smac": SMAC based on theories of bayesian optimization.
311311
- "nevergrad": Nevergrad based on theories of evolutionary learning.
312312
313313
- In case an object is given:
@@ -359,7 +359,7 @@ def __init__(self, name: Optional[str],
359359
360360
test_size:
361361
The amount of the data that should be left out if no outer_cv is given and
362-
eval_final_perfomance is set to True.
362+
eval_final_performance is set to True.
363363
364364
calculate_metrics_per_fold:
365365
If True, the metrics are calculated for each inner_fold.
@@ -377,11 +377,11 @@ def __init__(self, name: Optional[str],
377377
gives only warn and error, 1 gives adds info and 2 adds debug.
378378
379379
learning_curves:
380-
Enables larning curve procedure. Evaluate learning process over
380+
Enables learning curve procedure. Evaluate learning process over
381381
different sizes of input. Depends on learning_curves_cut.
382382
383383
learning_curves_cut:
384-
The tested relativ cuts for data size.
384+
The tested relative cuts for data size.
385385
386386
performance_constraints:
387387
Objects that indicate whether a configuration should
@@ -439,7 +439,7 @@ def __init__(self, name: Optional[str],
439439
learning_curves_cut=learning_curves_cut)
440440

441441
# ====================== Data ===========================
442-
self.data = Hyperpipe.Data()
442+
self.data = Hyperpipe.Data(allow_multidim_targets=allow_multidim_targets)
443443

444444
# ====================== Output Folder and Log File Management ===========================
445445
if output_settings:
@@ -1215,7 +1215,7 @@ def train_and_get_fimps(pipeline, train_idx, test_idx, data_X, data_y, data_kwar
12151215
no_outer_cv_indices = False
12161216
if outer_fold.best_config.best_config_score is None:
12171217
no_outer_cv_indices = True
1218-
if outer_fold.best_config.best_config_score.training is None or not outer_fold.best_config.best_config_score.training.indices:
1218+
elif outer_fold.best_config.best_config_score.training is None or not outer_fold.best_config.best_config_score.training.indices:
12191219
no_outer_cv_indices = True
12201220

12211221
if no_outer_cv_indices:

photonai/modelwrapper/imbalanced_data_transformer.py

+28-6
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class ImbalancedDataTransformer(BaseEstimator, TransformerMixin):
5151
'combine': ["SMOTEENN", "SMOTETomek"],
5252
}
5353

54-
def __init__(self, method_name: str = 'RandomUnderSampler', **kwargs):
54+
def __init__(self, method_name: str = 'RandomUnderSampler', config: dict = None):
5555
"""
5656
Instantiates an object that transforms the data into balanced groups according to the given method.
5757
@@ -84,20 +84,33 @@ def __init__(self, method_name: str = 'RandomUnderSampler', **kwargs):
8484
- SMOTEENN,
8585
- SMOTETomek.
8686
87-
**kwargs:
88-
Any parameters to pass to the imbalance strategy object.
87+
config:
88+
Each strategy has a set of presets. This parameter is necessary
89+
to select the appropriate settings for the selected method.
90+
It is important that the key exactly matches the method_name.
91+
If no key is found for a method, it will be started with the default settings.
92+
Please do not use this parameter inside the 'hyperparmeters' to optimize it.
8993
9094
"""
9195
if not __found__:
9296
raise ModuleNotFoundError("Module imblearn not found or not installed as expected. "
9397
"Please install the requirements.txt in PHOTON main folder.")
9498

99+
self.config = config
100+
self._method_name = None
95101
self.method_name = method_name
96102
self.needs_y = True
97103

104+
@property
105+
def method_name(self):
106+
return self._method_name
107+
108+
@method_name.setter
109+
def method_name(self, value):
110+
98111
imbalance_type = ''
99112
for group, possible_strategies in ImbalancedDataTransformer.IMBALANCED_DICT.items():
100-
if self.method_name in possible_strategies:
113+
if value in possible_strategies:
101114
imbalance_type = group
102115

103116
if imbalance_type == "oversampling":
@@ -115,8 +128,17 @@ def __init__(self, method_name: str = 'RandomUnderSampler', **kwargs):
115128
logger.error(msg)
116129
raise ValueError(msg)
117130

118-
desired_class = getattr(home, method_name)
119-
self.method = desired_class(**kwargs)
131+
desired_class = getattr(home, value)
132+
self._method_name = value
133+
if self.config is not None and value in self.config:
134+
if not isinstance(self.config[value], dict):
135+
msg = "Please use for the imbalanced config a format like: " \
136+
"config={'SMOTE': {'sampling_strategy': {0: 9, 1: 12}}}."
137+
logger.error(msg)
138+
raise ValueError(msg)
139+
self.method = desired_class(**self.config[value])
140+
else:
141+
self.method = desired_class()
120142

121143
def fit_transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray):
122144
"""

photonai/modelwrapper/keras_base_estimator.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import warnings
2-
import keras
2+
import tensorflow.keras as keras
33
from sklearn.base import BaseEstimator
44

55
from photonai.photonlogger.logger import logger

photonai/modelwrapper/keras_base_models.py

+14-14
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
11
import warnings
22
import numpy as np
3-
import keras
3+
import tensorflow.keras as keras
44
from typing import Union
5-
from keras.utils.all_utils import to_categorical
6-
from keras.layers import Dropout, Dense
7-
from keras.layers import BatchNormalization
8-
from keras.models import Sequential
9-
from keras.optimizers import Optimizer, adam_v2, rmsprop_v2, adadelta_v2, adagrad_v2, adamax_v2, nadam_v2, gradient_descent_v2
10-
from keras.activations import softmax, softplus, selu, sigmoid, softsign, hard_sigmoid, elu, relu, tanh, \
5+
from tensorflow.keras.utils import to_categorical
6+
from tensorflow.keras.layers import Dropout, Dense
7+
from tensorflow.keras.layers import BatchNormalization
8+
from tensorflow.keras.models import Sequential
9+
from tensorflow.keras.optimizers import Optimizer, Adam, RMSprop, Adadelta, Adagrad, Adamax, Nadam, SGD
10+
from tensorflow.keras.activations import softmax, softplus, selu, sigmoid, softsign, hard_sigmoid, elu, relu, tanh, \
1111
linear, exponential
1212
from sklearn.base import ClassifierMixin, RegressorMixin
1313

1414
from photonai.photonlogger.logger import logger
1515
from photonai.modelwrapper.keras_base_estimator import KerasBaseEstimator
1616

1717
__supported_optimizers__ = {
18-
'sgd': gradient_descent_v2.SGD,
19-
'rmsprop': rmsprop_v2.RMSprop,
20-
'adagrad': adagrad_v2.Adagrad,
21-
'adadelta': adadelta_v2.Adadelta,
22-
'adam': adam_v2.Adam,
23-
'adamax': adamax_v2.Adamax,
24-
'nadam': nadam_v2.Nadam
18+
'sgd': SGD,
19+
'rmsprop': RMSprop,
20+
'adagrad': Adagrad,
21+
'adadelta': Adadelta,
22+
'adam': Adam,
23+
'adamax': Adamax,
24+
'nadam': Nadam
2525
}
2626
__supported_activations__ = {
2727
'softmax': softmax,

photonai/modelwrapper/keras_dnn_classifier.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import numpy as np
22
from typing import Union
3-
from keras.optimizers import Optimizer
3+
from tensorflow.keras.optimizers import Optimizer
44

55
from photonai.modelwrapper.keras_base_models import KerasDnnBaseModel, KerasBaseClassifier
66
import photonai.modelwrapper.keras_base_models as keras_dnn_base_model

photonai/modelwrapper/keras_dnn_regressor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Union
22
import numpy as np
3-
from keras.optimizers import Optimizer
3+
from tensorflow.keras.optimizers import Optimizer
44
import photonai.modelwrapper.keras_base_models as keras_dnn_base_model
55

66
from photonai.modelwrapper.keras_base_models import KerasDnnBaseModel, KerasBaseRegressor

photonai/optimization/random_search/random_search.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class RandomSearchOptimizer(PhotonSlaveOptimizer):
1313
testing hyperparameter combinations without any grid.
1414
1515
"""
16-
def __init__(self, limit_in_minutes: Union[float, None] = 60, n_configurations: Union[int, None] = None):
16+
def __init__(self, limit_in_minutes: Union[float, None] = None, n_configurations: Union[int, None] = 10):
1717
"""
1818
Initialize the object.
1919
One of limit_in_minutes or n_configurations must differ from None.
@@ -74,19 +74,24 @@ def next_config_generator(self) -> Generator:
7474
7575
"""
7676
while True:
77-
_ = (yield self._generate_config())
7877
self.k_configutration += 1
78+
new_config = True
7979
if self.limit_in_minutes:
8080
if self.start_time is None:
8181
self.start_time = datetime.datetime.now()
8282
self.end_time = self.start_time + datetime.timedelta(minutes=self.limit_in_minutes)
8383

8484
if datetime.datetime.now() >= self.end_time:
85-
return
85+
new_config = False
8686

8787
if self.n_configurations:
88-
if self.k_configutration >= self.n_configurations:
89-
return
88+
if self.k_configutration >= self.n_configurations + 1:
89+
new_config = False
90+
91+
if not new_config:
92+
return
93+
94+
_ = (yield self._generate_config())
9095

9196
def _generate_config(self):
9297
config = {}

photonai/processing/results_handler.py

+24
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,30 @@ def collect_fold_lists(score_info_list, fold_nr, predictions_filename=''):
447447

448448
return sorted_df.to_dict('list')
449449

450+
def get_mean_train_predictions(self, filename=''):
451+
"""
452+
This function returns the MEAN predictions, true targets, and fold index
453+
for the TRAINING Set of the best configuration of each outer fold.
454+
"""
455+
if self.results is None:
456+
raise ValueError("Result tree information is needed but results attribute of object is None.")
457+
458+
score_info_list = list()
459+
fold_nr_list = list()
460+
for outer_fold in self.results.outer_folds:
461+
score_info_list.append(outer_fold.best_config.best_config_score.training)
462+
fold_nr_list.append(outer_fold.fold_nr)
463+
infos = self.collect_fold_lists(score_info_list, fold_nr_list, filename)
464+
infos = {key: np.array(value) for key, value in infos.items()}
465+
num_items = np.unique(infos["indices"])
466+
mean_pred = np.zeros(num_items.shape)
467+
y_true = np.zeros(num_items.shape)
468+
for i in num_items:
469+
idx = (infos["indices"] == i)
470+
mean_pred[i] = np.mean(infos["y_pred"][idx])
471+
y_true[i] = infos["y_true"][idx][0]
472+
return {'y_true': y_true, 'y_pred': mean_pred, 'indices': num_items}
473+
450474
def get_test_predictions(self, filename=''):
451475
"""
452476
This function returns the predictions, true targets, and fold index

photonai/requirements.txt

-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
numpy
33
matplotlib
44
scikit-learn
5-
keras<=2.6.0
65
pandas
76
plotly
87
imbalanced-learn

setup.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from setuptools import setup, find_packages
77

88

9-
__version__ = '2.2.0'
9+
__version__ = '2.2.1'
1010

1111
with open("README.md", "r", encoding="utf-8") as fh:
1212
long_description = fh.read()
@@ -41,7 +41,6 @@
4141
'numpy',
4242
'matplotlib',
4343
'scikit-learn',
44-
'keras<=2.6.0',
4544
'pandas',
4645
'plotly',
4746
'imbalanced-learn',

test/base_tests/test_hyperpipe.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313
from sklearn.model_selection import KFold
1414
from sklearn.pipeline import Pipeline as SKLPipeline
1515
from sklearn.preprocessing import StandardScaler
16-
from sklearn.inspection import permutation_importance
17-
from keras.metrics import Accuracy
16+
from tensorflow.keras.metrics import Accuracy
1817

1918
from photonai.base import PipelineElement, Hyperpipe, OutputSettings, Preprocessing, CallbackElement, Branch, Stack, \
2019
Switch, ParallelBranch

0 commit comments

Comments
 (0)