diff --git a/modAL/models/base.py b/modAL/models/base.py index 3d9dadc..7675f34 100644 --- a/modAL/models/base.py +++ b/modAL/models/base.py @@ -6,7 +6,7 @@ import abc import sys import warnings -from typing import Union, Callable, Optional, Tuple, List, Iterator, Any +from typing import Union, Callable, Optional, Tuple, List, Iterator, Any, Protocol, TypeVar import numpy as np from sklearn.base import BaseEstimator @@ -23,6 +23,48 @@ else: ABC = abc.ABCMeta('ABC', (), {}) +GenericEstimator = TypeVar('GenericEstimator') + + +class FitFunction(Protocol): + def __call__(self, estimator: GenericEstimator, X, y, **kwargs) -> GenericEstimator: + raise NotImplementedError + + +class PredictFunction(Protocol): + def __call__(self, estimator: GenericEstimator, X, **kwargs) -> np.array: + raise NotImplementedError + + +class PredictProbaFunction(Protocol): + def __call__(self, estimator: GenericEstimator, X, **kwargs) -> np.array: + raise NotImplementedError + + +class ScoreFunction(Protocol): + def __call__(self, estimator: GenericEstimator, X, y, **kwargs) -> Any: + raise NotImplementedError + + +class SKLearnFitFunction(FitFunction): + def __call__(self, estimator: BaseEstimator, X, y, **kwargs) -> BaseEstimator: + return estimator.fit(X=X, y=y, **kwargs) + + +class SKLearnPredictFunction(PredictFunction): + def __call__(self, estimator: BaseEstimator, X, **kwargs) -> np.array: + return estimator.predict(X=X, **kwargs) + + +class SKLearnPredictProbaFunction(PredictFunction): + def __call__(self, estimator: BaseEstimator, X, **kwargs) -> np.array: + return estimator.predict_proba(X=X, **kwargs) + + +class SKLearnScoreFunction(ScoreFunction): + def __call__(self, estimator: BaseEstimator, X, y, **kwargs) -> Any: + return estimator.score(X=X, y=y, **kwargs) + class BaseLearner(ABC, BaseEstimator): """ @@ -49,6 +91,7 @@ class BaseLearner(ABC, BaseEstimator): which the model has been trained on. y_training: The labels corresponding to X_training. """ + def __init__(self, estimator: BaseEstimator, query_strategy: Callable, @@ -57,6 +100,10 @@ def __init__(self, bootstrap_init: bool = False, on_transformed: bool = False, force_all_finite: bool = True, + fit_func: FitFunction = SKLearnFitFunction(), + predict_func: PredictFunction = SKLearnPredictFunction(), + predict_proba_func: PredictProbaFunction = SKLearnPredictProbaFunction(), + score_func: ScoreFunction = SKLearnScoreFunction(), **fit_kwargs ) -> None: assert callable(query_strategy), 'query_strategy must be callable' @@ -73,6 +120,11 @@ def __init__(self, assert isinstance(force_all_finite, bool), 'force_all_finite must be a bool' self.force_all_finite = force_all_finite + self.fit_func = fit_func + self.predict_func = predict_func + self.predict_proba_func = predict_proba_func + self.score_func = score_func + def _add_training_data(self, X: modALinput, y: modALinput) -> None: """ Adds the new data and label to the known data, but does not retrain the model. @@ -152,11 +204,14 @@ def _fit_to_known(self, bootstrap: bool = False, **fit_kwargs) -> 'BaseLearner': self """ if not bootstrap: - self.estimator.fit(self.X_training, self.y_training, **fit_kwargs) + self.fit_func(estimator=self.estimator, X=self.X_training, y=self.y_training, **fit_kwargs) else: n_instances = self.X_training.shape[0] bootstrap_idx = np.random.choice(range(n_instances), n_instances, replace=True) - self.estimator.fit(self.X_training[bootstrap_idx], self.y_training[bootstrap_idx], **fit_kwargs) + self.fit_func(estimator=self.estimator, + X=self.X_training[bootstrap_idx], + y=self.y_training[bootstrap_idx], + **fit_kwargs) return self @@ -177,10 +232,12 @@ def _fit_on_new(self, X: modALinput, y: modALinput, bootstrap: bool = False, **f force_all_finite=self.force_all_finite) if not bootstrap: - self.estimator.fit(X, y, **fit_kwargs) + self.fit_func(estimator=self.estimator, X=X, y=y, **fit_kwargs) else: bootstrap_idx = np.random.choice(range(X.shape[0]), X.shape[0], replace=True) - self.estimator.fit(X[bootstrap_idx], y[bootstrap_idx]) + self.fit_func(estimator=self.estimator, + X=X[bootstrap_idx], + y=y[bootstrap_idx]) return self @@ -219,7 +276,7 @@ def predict(self, X: modALinput, **predict_kwargs) -> Any: Returns: Estimator predictions for X. """ - return self.estimator.predict(X, **predict_kwargs) + return self.predict_func(estimator=self.estimator, X=X, **predict_kwargs) def predict_proba(self, X: modALinput, **predict_proba_kwargs) -> Any: """ @@ -232,7 +289,7 @@ def predict_proba(self, X: modALinput, **predict_proba_kwargs) -> Any: Returns: Class probabilities for X. """ - return self.estimator.predict_proba(X, **predict_proba_kwargs) + return self.predict_proba_func(estimator=self.estimator, X=X, **predict_proba_kwargs) def query(self, X_pool, *query_args, **query_kwargs) -> Union[Tuple, modALinput]: """ @@ -272,7 +329,7 @@ def score(self, X: modALinput, y: modALinput, **score_kwargs) -> Any: Returns: The score of the predictor. """ - return self.estimator.score(X, y, **score_kwargs) + return self.score_func(X=X, y=y, **score_kwargs) @abc.abstractmethod def teach(self, *args, **kwargs) -> None: @@ -289,6 +346,7 @@ class BaseCommittee(ABC, BaseEstimator): on_transformed: Whether to transform samples with the pipeline defined by each learner's estimator when applying the query strategy. """ + def __init__(self, learner_list: List[BaseLearner], query_strategy: Callable, on_transformed: bool = False) -> None: assert type(learner_list) == list, 'learners must be supplied in a list' @@ -413,7 +471,8 @@ def rebag(self, **fit_kwargs) -> None: """ self._fit_to_known(bootstrap=True, **fit_kwargs) - def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None: + def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, + **fit_kwargs) -> None: """ Adds X and y to the known training data for each learner and retrains learners with the augmented dataset. @@ -436,4 +495,4 @@ def predict(self, X: modALinput) -> Any: @abc.abstractmethod def vote(self, X: modALinput) -> Any: # TODO: clarify typing - pass \ No newline at end of file + pass diff --git a/modAL/models/learners.py b/modAL/models/learners.py index 9af43a2..5641937 100644 --- a/modAL/models/learners.py +++ b/modAL/models/learners.py @@ -5,7 +5,18 @@ from sklearn.base import BaseEstimator from sklearn.metrics import accuracy_score -from modAL.models.base import BaseLearner, BaseCommittee +from modAL.models.base import ( + BaseLearner, + BaseCommittee, + FitFunction, + PredictFunction, + PredictProbaFunction, + ScoreFunction, + SKLearnFitFunction, + SKLearnPredictFunction, + SKLearnPredictProbaFunction, + SKLearnScoreFunction +) from modAL.utils.validation import check_class_labels, check_class_proba from modAL.utils.data import modALinput, retrieve_rows from modAL.uncertainty import uncertainty_sampling @@ -69,6 +80,11 @@ class ActiveLearner(BaseLearner): ... ) """ + fit_func: FitFunction = SKLearnFitFunction() + predict_func: PredictFunction = SKLearnPredictFunction() + predict_proba_func: PredictProbaFunction = SKLearnPredictProbaFunction() + score_func: ScoreFunction = SKLearnScoreFunction() + def __init__(self, estimator: BaseEstimator, query_strategy: Callable = uncertainty_sampling, @@ -76,10 +92,17 @@ def __init__(self, y_training: Optional[modALinput] = None, bootstrap_init: bool = False, on_transformed: bool = False, + force_all_finite: bool = True, + fit_func: FitFunction = SKLearnFitFunction(), + predict_func: PredictFunction = SKLearnPredictFunction(), + predict_proba_func: PredictProbaFunction = SKLearnPredictProbaFunction(), + score_func: ScoreFunction = SKLearnScoreFunction(), **fit_kwargs ) -> None: super().__init__(estimator, query_strategy, - X_training, y_training, bootstrap_init, on_transformed, **fit_kwargs) + X_training, y_training, bootstrap_init, on_transformed, force_all_finite, + fit_func, predict_func, predict_proba_func, score_func, + **fit_kwargs) def teach(self, X: modALinput, y: modALinput, bootstrap: bool = False, only_new: bool = False, **fit_kwargs) -> None: """ @@ -174,6 +197,12 @@ class BayesianOptimizer(BaseLearner): ... query_idx, query_inst = optimizer.query(X) ... optimizer.teach(X[query_idx].reshape(1, -1), y[query_idx].reshape(1, -1)) """ + + fit_func: FitFunction = SKLearnFitFunction() + predict_func: PredictFunction = SKLearnPredictFunction() + predict_proba_func: PredictProbaFunction = SKLearnPredictProbaFunction() + score_func: ScoreFunction = SKLearnScoreFunction() + def __init__(self, estimator: BaseEstimator, query_strategy: Callable = max_EI, @@ -181,6 +210,10 @@ def __init__(self, y_training: Optional[modALinput] = None, bootstrap_init: bool = False, on_transformed: bool = False, + fit_func: FitFunction = SKLearnFitFunction(), + predict_func: PredictFunction = SKLearnPredictFunction(), + predict_proba_func: PredictProbaFunction = SKLearnPredictProbaFunction(), + score_func: ScoreFunction = SKLearnScoreFunction(), **fit_kwargs) -> None: super(BayesianOptimizer, self).__init__(estimator, query_strategy, X_training, y_training, bootstrap_init, on_transformed, **fit_kwargs) diff --git a/setup.py b/setup.py index 8daf94e..c3f2b60 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='modAL', - version='0.4.0', + version='0.4.1', author='Tivadar Danka', author_email='85a5187a@opayq.com', description='A modular active learning framework for Python3', diff --git a/tests/example_tests/active_regression.py b/tests/example_tests/active_regression.py index 72d43f3..54eca5d 100644 --- a/tests/example_tests/active_regression.py +++ b/tests/example_tests/active_regression.py @@ -27,7 +27,7 @@ regressor = ActiveLearner( estimator=GaussianProcessRegressor(kernel=kernel), query_strategy=max_std_sampling, - X_training=X_initial.reshape(-1, 1), y_training=y_initial.reshape(-1, 1) + X_training=X_initial.reshape(-1, 1), y_training=y_initial.reshape(-1, 1), ) # active learning