From d5edb29f717c6bc830b8d832fcca8b3cd2e32e46 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers
Date: Tue, 30 Nov 2021 14:48:50 +0100
Subject: [PATCH] Add a `goal` hyperparameter to Gama base (#133)
This lets GAMA automagically configure the AutoML pipeline based on the
user's intent.
---
docs/source/releases.rst | 10 ++++++++++
gama/__version__.py | 2 +-
gama/gama.py | 28 ++++++++++++++++++++++++----
3 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/docs/source/releases.rst b/docs/source/releases.rst
index c0984b54..f8dd5de9 100644
--- a/docs/source/releases.rst
+++ b/docs/source/releases.rst
@@ -1,6 +1,16 @@
Release Notes
=============
+Version 21.0.1
+--------------
+
+Features:
+ - Add a ``goal`` hyperparameter to all GAMA estimators which let you specify the goal
+ of your AutoML execution. Currently ``simplicity`` can be specified to create a
+ simple model, and ``performance`` can be used to generate the best possible model.
+ It is still possible to manually set the search and post processing methods.
+
+
Version 21.0.0
--------------
diff --git a/gama/__version__.py b/gama/__version__.py
index 4670b828..596b04cf 100644
--- a/gama/__version__.py
+++ b/gama/__version__.py
@@ -1,2 +1,2 @@
# format: YY.minor.micro
-__version__ = "21.0.0"
+__version__ = "21.0.1"
diff --git a/gama/gama.py b/gama/gama.py
index 82f95fc3..50a32ee2 100644
--- a/gama/gama.py
+++ b/gama/gama.py
@@ -93,10 +93,11 @@ def __init__(
n_jobs: Optional[int] = None,
max_memory_mb: Optional[int] = None,
verbosity: int = logging.WARNING,
- search: BaseSearch = AsyncEA(),
- post_processing: BasePostProcessing = BestFitPostProcessing(),
+ search: Optional[BaseSearch] = None,
+ post_processing: Optional[BasePostProcessing] = None,
output_directory: Optional[str] = None,
store: str = "logs",
+ goal: str = "simplicity",
):
"""
@@ -148,12 +149,14 @@ def __init__(
verbosity: int (default=logging.WARNING)
Sets the level of log messages to be automatically output to terminal.
- search: BaseSearch (default=AsyncEA())
+ search: BaseSearch, optional
Search method to use to find good pipelines. Should be instantiated.
+ Default depends on ``goal``.
- post_processing: BasePostProcessing (default=BestFitPostProcessing())
+ post_processing: BasePostProcessing, optional
Post-processing method to create a model after the search phase.
Should be an instantiated subclass of BasePostProcessing.
+ Default depends on ``goal``.
output_directory: str, optional (default=None)
Directory to use to save GAMA output. This includes both intermediate
@@ -166,7 +169,24 @@ def __init__(
- 'models': keep only cache with models and predictions
- 'logs': keep only the logs
- 'all': keep logs and cache with models and predictions
+
+ goal: str (default='simplicity')
+ Determines the steps of the AutoML pipeline when they are not
+ provided explicitly, based on the given goal.
+ One of:
+ - simplicity: Create a simple pipeline with good performance.
+ - performance: Try to get the best performing model.
"""
+ if search is None:
+ search = AsyncEA()
+ if post_processing is None:
+ if goal == 'simplicity':
+ post_processing = BestFitPostProcessing()
+ elif goal == 'performance':
+ post_processing = EnsemblePostProcessing()
+ else:
+ raise ValueError(f"Unknown value for `goal`: '{goal}'")
+
if not output_directory:
output_directory = f"gama_{str(uuid.uuid4())}"
self.output_directory = os.path.abspath(os.path.expanduser(output_directory))