From d5edb29f717c6bc830b8d832fcca8b3cd2e32e46 Mon Sep 17 00:00:00 2001
From: Pieter Gijsbers <p.gijsbers@tue.nl>
Date: Tue, 30 Nov 2021 14:48:50 +0100
Subject: [PATCH] Add a `goal` hyperparameter to Gama base (#133)

This lets GAMA automagically configure the AutoML pipeline based on the
user's intent.
---
 docs/source/releases.rst | 10 ++++++++++
 gama/__version__.py      |  2 +-
 gama/gama.py             | 28 ++++++++++++++++++++++++----
 3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/docs/source/releases.rst b/docs/source/releases.rst
index c0984b54..f8dd5de9 100644
--- a/docs/source/releases.rst
+++ b/docs/source/releases.rst
@@ -1,6 +1,16 @@
 Release Notes
 =============
 
+Version 21.0.1
+--------------
+
+Features:
+ - Add a ``goal`` hyperparameter to all GAMA estimators which let you specify the goal
+   of your AutoML execution. Currently ``simplicity`` can be specified to create a
+   simple model, and ``performance`` can be used to generate the best possible model.
+   It is still possible to manually set the search and post processing methods.
+
+
 Version 21.0.0
 --------------
 
diff --git a/gama/__version__.py b/gama/__version__.py
index 4670b828..596b04cf 100644
--- a/gama/__version__.py
+++ b/gama/__version__.py
@@ -1,2 +1,2 @@
 # format: YY.minor.micro
-__version__ = "21.0.0"
+__version__ = "21.0.1"
diff --git a/gama/gama.py b/gama/gama.py
index 82f95fc3..50a32ee2 100644
--- a/gama/gama.py
+++ b/gama/gama.py
@@ -93,10 +93,11 @@ def __init__(
         n_jobs: Optional[int] = None,
         max_memory_mb: Optional[int] = None,
         verbosity: int = logging.WARNING,
-        search: BaseSearch = AsyncEA(),
-        post_processing: BasePostProcessing = BestFitPostProcessing(),
+        search: Optional[BaseSearch] = None,
+        post_processing: Optional[BasePostProcessing] = None,
         output_directory: Optional[str] = None,
         store: str = "logs",
+        goal: str = "simplicity",
     ):
         """
 
@@ -148,12 +149,14 @@ def __init__(
         verbosity: int (default=logging.WARNING)
             Sets the level of log messages to be automatically output to terminal.
 
-        search: BaseSearch (default=AsyncEA())
+        search: BaseSearch, optional
             Search method to use to find good pipelines. Should be instantiated.
+            Default depends on ``goal``.
 
-        post_processing: BasePostProcessing (default=BestFitPostProcessing())
+        post_processing: BasePostProcessing, optional
             Post-processing method to create a model after the search phase.
             Should be an instantiated subclass of BasePostProcessing.
+            Default depends on ``goal``.
 
         output_directory: str, optional (default=None)
             Directory to use to save GAMA output. This includes both intermediate
@@ -166,7 +169,24 @@ def __init__(
              - 'models': keep only cache with models and predictions
              - 'logs': keep only the logs
              - 'all': keep logs and cache with models and predictions
+
+        goal: str (default='simplicity')
+            Determines the steps of the AutoML pipeline when they are not
+            provided explicitly, based on the given goal.
+            One of:
+                - simplicity: Create a simple pipeline with good performance.
+                - performance: Try to get the best performing model.
         """
+        if search is None:
+            search = AsyncEA()
+        if post_processing is None:
+            if goal == 'simplicity':
+                post_processing = BestFitPostProcessing()
+            elif goal == 'performance':
+                post_processing = EnsemblePostProcessing()
+            else:
+                raise ValueError(f"Unknown value for `goal`: '{goal}'")
+
         if not output_directory:
             output_directory = f"gama_{str(uuid.uuid4())}"
         self.output_directory = os.path.abspath(os.path.expanduser(output_directory))