Merge pull request numba#9094 from stuartarchibald/fix/8172

Add support for a 'max' level to NUMBA_OPT environment variable.
Deezy33-creator · Aug 2, 2023 · 8fa8ddd · 8fa8ddd
2 parents d7a03e0 + c91b102
commit 8fa8ddd
Show file tree

Hide file tree

Showing 5 changed files with 182 additions and 10 deletions.
diff --git a/docs/source/reference/envvars.rst b/docs/source/reference/envvars.rst
@@ -299,7 +299,16 @@ Compilation options
 
 .. envvar:: NUMBA_OPT
 
-   The optimization level; this option is passed straight to LLVM.
+   The optimization level; typically this option is passed straight to LLVM. It
+   may take one of the values ``0``, ``1``, ``2`` or ``3`` which correspond
+   approximately to the ``-O{value}`` flag found in many command line
+   compilation tools. The value ``max`` is also supported, this is Numba
+   specific, it has the effect of running with the optimization level set at
+   ``3`` both before and after a pass which in which reference count operation
+   pruning takes place. In some cases this may increase performance, in other
+   cases it may impede performance, the same can be said for compilation time.
+   This option is present to give users the opportunity to choose a value
+   suitable for their application.
 
    *Default value:* 3
 

diff --git a/docs/upcoming_changes/9094.new_feature.rst b/docs/upcoming_changes/9094.new_feature.rst
@@ -0,0 +1,13 @@
+
+Add support for value ``max`` to ``NUMBA_OPT``.
+===============================================
+
+The optimisation level that Numba applies when compiling can be set through the
+environment variable ``NUMBA_OPT``. This has historically been a value between
+0 and 3 (inclusive). Support for the value ``max`` has now been added, this is a
+Numba-specific optimisation level which indicates that the user would like Numba
+to try running the most optimisation possible, potentially trading a longer
+compilation time for better run-time performance. In practice, use of the ``max``
+level of optimisation may or may not benefit the run-time or compile-time
+performance of user code, but it has been added to present an easy to access
+option for users to try if they so wish.
diff --git a/numba/core/codegen.py b/numba/core/codegen.py
@@ -1187,10 +1187,30 @@ def _init(self, llvm_module):
         self._engine = JitEngine(engine)
         self._target_data = engine.target_data
         self._data_layout = str(self._target_data)
-        self._mpm_cheap = self._module_pass_manager(loop_vectorize=False,
+
+        if config.OPT.is_opt_max:
+            # If the OPT level is set to 'max' then the user is requesting that
+            # compilation time is traded for potential performance gain. This
+            # currently manifests as running the "cheap" pass at -O3
+            # optimisation level with loop-vectorization enabled. There's no
+            # guarantee that this will increase runtime performance, it may
+            # detriment it, this is here to give the user an easily accessible
+            # option to try.
+            loopvect = True
+            opt_level = 3
+        else:
+            # The default behaviour is to do an opt=0 pass to try and inline as
+            # much as possible with the cheapest cost of doing so. This is so
+            # that the ref-op pruner pass that runs after the cheap pass will
+            # have the largest possible scope for working on pruning references.
+            loopvect = False
+            opt_level = 0
+
+        self._mpm_cheap = self._module_pass_manager(loop_vectorize=loopvect,
                                                     slp_vectorize=False,
-                                                    opt=0,
+                                                    opt=opt_level,
                                                     cost="cheap")
+
         self._mpm_full = self._module_pass_manager()
 
         self._engine.set_object_cache(self._library_class._object_compiled_hook,

diff --git a/numba/core/config.py b/numba/core/config.py
@@ -80,6 +80,49 @@ def _validate_captured_errors_style(style_str):
         return rendered_style
 
 
+class _OptLevel(int):
+    """This class holds the "optimisation level" set in `NUMBA_OPT`. As this env
+    var can be an int or a string, but is almost always interpreted as an int,
+    this class subclasses int so as to get the common behaviour but stores the
+    actual value as a `_raw_value` member. The value "max" is a special case
+    and the property `is_opt_max` can be queried to find if the optimisation
+    level (supplied value at construction time) is "max"."""
+
+    def __new__(cls, *args, **kwargs):
+        assert len(args) == 1
+        (value,) = args
+        _int_value = 3 if value == 'max' else int(value)
+        # the int ctor is always called with an appropriate integer value
+        new = super().__new__(cls, _int_value, **kwargs)
+        # raw value is max or int
+        new._raw_value = value if value == 'max' else _int_value
+        return new
+
+    @property
+    def is_opt_max(self):
+        """Returns True if the the optimisation level is "max" False
+        otherwise."""
+        return self._raw_value == "max"
+
+    def __repr__(self):
+        if isinstance(self._raw_value, str):
+            arg = f"'{self._raw_value}'"
+        else:
+            arg = self._raw_value
+        return f"_OptLevel({arg})"
+
+
+def _process_opt_level(opt_level):
+
+    if opt_level not in ('0', '1', '2', '3', 'max'):
+        msg = ("Environment variable `NUMBA_OPT` is set to an unsupported "
+               f"value '{opt_level}', supported values are 0, 1, 2, 3, and "
+               "'max'")
+        raise ValueError(msg)
+    else:
+        return _OptLevel(opt_level)
+
+
 class _EnvReloader(object):
 
     def __init__(self):
@@ -150,9 +193,11 @@ def _readenv(name, ctor, default):
                 return default() if callable(default) else default
             try:
                 return ctor(value)
-            except Exception:
-                warnings.warn("environ %s defined but failed to parse '%s'" %
-                              (name, value), RuntimeWarning)
+            except Exception as e:
+                warnings.warn(f"Environment variable '{name}' is defined but "
+                              f"its associated value '{value}' could not be "
+                              f"parsed.\nThe parse failed with exception: {e}.",
+                              RuntimeWarning)
                 return default
 
         def optional_str(x):
@@ -259,7 +304,7 @@ def optional_str(x):
                                 ("" if str(CPU_NAME).lower() == 'generic'
                                  else None))
         # Optimization level
-        OPT = _readenv("NUMBA_OPT", int, 3)
+        OPT = _readenv("NUMBA_OPT", _process_opt_level, _OptLevel(3))
 
         # Force dump of Python bytecode
         DUMP_BYTECODE = _readenv("NUMBA_DUMP_BYTECODE", int, DEBUG_FRONTEND)

diff --git a/numba/tests/test_config.py b/numba/tests/test_config.py
@@ -2,6 +2,7 @@
 import tempfile
 from textwrap import dedent
 import unittest
+from unittest import mock
 from numba.tests.support import (TestCase, temp_directory, override_env_config,
                                  run_in_subprocess)
 from numba.core import config
@@ -113,11 +114,95 @@ def test_illegal_error_style_handling(self):
         code = ("from numba import njit\n@njit\ndef foo():\n\t"
                 f"print('{source_compiled}')\nfoo()")
         out, err = run_in_subprocess(dedent(code), env=new_env)
-        expected = ("environ NUMBA_CAPTURED_ERRORS defined but failed to parse "
-                    "\'not_a_known_style\'")
-        self.assertIn(expected, err.decode('utf-8'))
+        expected = ("Environment variable \'NUMBA_CAPTURED_ERRORS\' is defined "
+                    "but its associated value \'not_a_known_style\' could not "
+                    "be parsed.")
+        err_msg = err.decode('utf-8')
+        self.assertIn(expected, err_msg)
+        ex_expected = ("The parse failed with exception: Invalid style in "
+                       "NUMBA_CAPTURED_ERRORS: not_a_known_style")
+        self.assertIn(ex_expected, err_msg)
         self.assertIn(source_compiled, out.decode('utf-8'))
 
 
+class TestNumbaOptLevel(TestCase):
+    # Tests that the setting of NUMBA_OPT influences the "cheap" module pass.
+    # Spot checks NUMBA_OPT={'max', '3', '0'}
+
+    def check(self, expected, opt_value, raw_value):
+        # local imports for state-safety
+        from numba import config, njit
+
+        # check opt value and its raw_value
+        self.assertEqual(config.OPT, opt_value)
+        self.assertEqual(config.OPT._raw_value, raw_value)
+
+        # Patch the CPUCodegen to make capture calls to the
+        # `_module_pass_manager` through a `side_effect` function that asserts
+        # that the kwargs being passed are as expected per the "NUMBA_OPT"
+        # level. The `side_effect` function immediately raises with a knwon
+        # message to abort further stages compilation once the check is
+        # complete.
+        from numba.core.codegen import CPUCodegen
+        side_effect_message = "expected side effect"
+
+        def side_effect(*args, **kwargs):
+            self.assertEqual(kwargs, expected)
+            raise RuntimeError(side_effect_message)
+
+        with mock.patch.object(CPUCodegen, '_module_pass_manager',
+                               side_effect=side_effect):
+            with self.assertRaises(RuntimeError) as raises:
+                njit(lambda : ...)()
+
+            self.assertIn(side_effect_message, str(raises.exception))
+
+    @TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': 'max'})
+    def test_opt_max(self):
+        # NUMBA_OPT='max' should set opt to 3 and enable loop_vectorize
+        expected = {'loop_vectorize': True,
+                    'slp_vectorize': False,
+                    'opt': 3,
+                    'cost': 'cheap'}
+        self.check(expected, 3, 'max')
+
+    @TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': '3'})
+    def test_opt_3(self):
+        # NUMBA_OPT='3' should not impact opt or loop_vectorize
+        expected = {'loop_vectorize': False,
+                    'slp_vectorize': False,
+                    'opt': 0,
+                    'cost': 'cheap'}
+        self.check(expected, 3, 3)
+
+    @TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': '0'})
+    def test_opt_0(self):
+        # NUMBA_OPT='0' should not impact opt or loop_vectorize
+        expected = {'loop_vectorize': False,
+                    'slp_vectorize': False,
+                    'opt': 0,
+                    'cost': 'cheap'}
+        self.check(expected, 0, 0)
+
+    @TestCase.run_test_in_subprocess()
+    def test_opt_default(self):
+        # NUMBA_OPT is not set, the default should not impact opt or
+        # loop_vectorize
+        expected = {'loop_vectorize': False,
+                    'slp_vectorize': False,
+                    'opt': 0,
+                    'cost': 'cheap'}
+        self.check(expected, 3, 3)
+
+    @TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': 'invalid'})
+    def test_opt_invalid(self):
+        # NUMBA_OPT='invalid' should just proceed as default case
+        expected = {'loop_vectorize': False,
+                    'slp_vectorize': False,
+                    'opt': 0,
+                    'cost': 'cheap'}
+        self.check(expected, 3, 3)
+
+
 if __name__ == '__main__':
     unittest.main()