diff --git a/docs/source/reference/envvars.rst b/docs/source/reference/envvars.rst index 53aecd12848..eb606ff2a82 100644 --- a/docs/source/reference/envvars.rst +++ b/docs/source/reference/envvars.rst @@ -299,7 +299,16 @@ Compilation options .. envvar:: NUMBA_OPT - The optimization level; this option is passed straight to LLVM. + The optimization level; typically this option is passed straight to LLVM. It + may take one of the values ``0``, ``1``, ``2`` or ``3`` which correspond + approximately to the ``-O{value}`` flag found in many command line + compilation tools. The value ``max`` is also supported, this is Numba + specific, it has the effect of running with the optimization level set at + ``3`` both before and after a pass which in which reference count operation + pruning takes place. In some cases this may increase performance, in other + cases it may impede performance, the same can be said for compilation time. + This option is present to give users the opportunity to choose a value + suitable for their application. *Default value:* 3 diff --git a/docs/upcoming_changes/9094.new_feature.rst b/docs/upcoming_changes/9094.new_feature.rst new file mode 100644 index 00000000000..0cf7938f229 --- /dev/null +++ b/docs/upcoming_changes/9094.new_feature.rst @@ -0,0 +1,13 @@ + +Add support for value ``max`` to ``NUMBA_OPT``. +=============================================== + +The optimisation level that Numba applies when compiling can be set through the +environment variable ``NUMBA_OPT``. This has historically been a value between +0 and 3 (inclusive). Support for the value ``max`` has now been added, this is a +Numba-specific optimisation level which indicates that the user would like Numba +to try running the most optimisation possible, potentially trading a longer +compilation time for better run-time performance. In practice, use of the ``max`` +level of optimisation may or may not benefit the run-time or compile-time +performance of user code, but it has been added to present an easy to access +option for users to try if they so wish. diff --git a/numba/core/codegen.py b/numba/core/codegen.py index ced9e87aac7..28ab618a0b0 100644 --- a/numba/core/codegen.py +++ b/numba/core/codegen.py @@ -1187,10 +1187,30 @@ def _init(self, llvm_module): self._engine = JitEngine(engine) self._target_data = engine.target_data self._data_layout = str(self._target_data) - self._mpm_cheap = self._module_pass_manager(loop_vectorize=False, + + if config.OPT.is_opt_max: + # If the OPT level is set to 'max' then the user is requesting that + # compilation time is traded for potential performance gain. This + # currently manifests as running the "cheap" pass at -O3 + # optimisation level with loop-vectorization enabled. There's no + # guarantee that this will increase runtime performance, it may + # detriment it, this is here to give the user an easily accessible + # option to try. + loopvect = True + opt_level = 3 + else: + # The default behaviour is to do an opt=0 pass to try and inline as + # much as possible with the cheapest cost of doing so. This is so + # that the ref-op pruner pass that runs after the cheap pass will + # have the largest possible scope for working on pruning references. + loopvect = False + opt_level = 0 + + self._mpm_cheap = self._module_pass_manager(loop_vectorize=loopvect, slp_vectorize=False, - opt=0, + opt=opt_level, cost="cheap") + self._mpm_full = self._module_pass_manager() self._engine.set_object_cache(self._library_class._object_compiled_hook, diff --git a/numba/core/config.py b/numba/core/config.py index 6c25bc21045..c2edf63984d 100644 --- a/numba/core/config.py +++ b/numba/core/config.py @@ -80,6 +80,49 @@ def _validate_captured_errors_style(style_str): return rendered_style +class _OptLevel(int): + """This class holds the "optimisation level" set in `NUMBA_OPT`. As this env + var can be an int or a string, but is almost always interpreted as an int, + this class subclasses int so as to get the common behaviour but stores the + actual value as a `_raw_value` member. The value "max" is a special case + and the property `is_opt_max` can be queried to find if the optimisation + level (supplied value at construction time) is "max".""" + + def __new__(cls, *args, **kwargs): + assert len(args) == 1 + (value,) = args + _int_value = 3 if value == 'max' else int(value) + # the int ctor is always called with an appropriate integer value + new = super().__new__(cls, _int_value, **kwargs) + # raw value is max or int + new._raw_value = value if value == 'max' else _int_value + return new + + @property + def is_opt_max(self): + """Returns True if the the optimisation level is "max" False + otherwise.""" + return self._raw_value == "max" + + def __repr__(self): + if isinstance(self._raw_value, str): + arg = f"'{self._raw_value}'" + else: + arg = self._raw_value + return f"_OptLevel({arg})" + + +def _process_opt_level(opt_level): + + if opt_level not in ('0', '1', '2', '3', 'max'): + msg = ("Environment variable `NUMBA_OPT` is set to an unsupported " + f"value '{opt_level}', supported values are 0, 1, 2, 3, and " + "'max'") + raise ValueError(msg) + else: + return _OptLevel(opt_level) + + class _EnvReloader(object): def __init__(self): @@ -150,9 +193,11 @@ def _readenv(name, ctor, default): return default() if callable(default) else default try: return ctor(value) - except Exception: - warnings.warn("environ %s defined but failed to parse '%s'" % - (name, value), RuntimeWarning) + except Exception as e: + warnings.warn(f"Environment variable '{name}' is defined but " + f"its associated value '{value}' could not be " + f"parsed.\nThe parse failed with exception: {e}.", + RuntimeWarning) return default def optional_str(x): @@ -259,7 +304,7 @@ def optional_str(x): ("" if str(CPU_NAME).lower() == 'generic' else None)) # Optimization level - OPT = _readenv("NUMBA_OPT", int, 3) + OPT = _readenv("NUMBA_OPT", _process_opt_level, _OptLevel(3)) # Force dump of Python bytecode DUMP_BYTECODE = _readenv("NUMBA_DUMP_BYTECODE", int, DEBUG_FRONTEND) diff --git a/numba/tests/test_config.py b/numba/tests/test_config.py index ef0d4efdc4d..fca7780b057 100644 --- a/numba/tests/test_config.py +++ b/numba/tests/test_config.py @@ -2,6 +2,7 @@ import tempfile from textwrap import dedent import unittest +from unittest import mock from numba.tests.support import (TestCase, temp_directory, override_env_config, run_in_subprocess) from numba.core import config @@ -113,11 +114,95 @@ def test_illegal_error_style_handling(self): code = ("from numba import njit\n@njit\ndef foo():\n\t" f"print('{source_compiled}')\nfoo()") out, err = run_in_subprocess(dedent(code), env=new_env) - expected = ("environ NUMBA_CAPTURED_ERRORS defined but failed to parse " - "\'not_a_known_style\'") - self.assertIn(expected, err.decode('utf-8')) + expected = ("Environment variable \'NUMBA_CAPTURED_ERRORS\' is defined " + "but its associated value \'not_a_known_style\' could not " + "be parsed.") + err_msg = err.decode('utf-8') + self.assertIn(expected, err_msg) + ex_expected = ("The parse failed with exception: Invalid style in " + "NUMBA_CAPTURED_ERRORS: not_a_known_style") + self.assertIn(ex_expected, err_msg) self.assertIn(source_compiled, out.decode('utf-8')) +class TestNumbaOptLevel(TestCase): + # Tests that the setting of NUMBA_OPT influences the "cheap" module pass. + # Spot checks NUMBA_OPT={'max', '3', '0'} + + def check(self, expected, opt_value, raw_value): + # local imports for state-safety + from numba import config, njit + + # check opt value and its raw_value + self.assertEqual(config.OPT, opt_value) + self.assertEqual(config.OPT._raw_value, raw_value) + + # Patch the CPUCodegen to make capture calls to the + # `_module_pass_manager` through a `side_effect` function that asserts + # that the kwargs being passed are as expected per the "NUMBA_OPT" + # level. The `side_effect` function immediately raises with a knwon + # message to abort further stages compilation once the check is + # complete. + from numba.core.codegen import CPUCodegen + side_effect_message = "expected side effect" + + def side_effect(*args, **kwargs): + self.assertEqual(kwargs, expected) + raise RuntimeError(side_effect_message) + + with mock.patch.object(CPUCodegen, '_module_pass_manager', + side_effect=side_effect): + with self.assertRaises(RuntimeError) as raises: + njit(lambda : ...)() + + self.assertIn(side_effect_message, str(raises.exception)) + + @TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': 'max'}) + def test_opt_max(self): + # NUMBA_OPT='max' should set opt to 3 and enable loop_vectorize + expected = {'loop_vectorize': True, + 'slp_vectorize': False, + 'opt': 3, + 'cost': 'cheap'} + self.check(expected, 3, 'max') + + @TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': '3'}) + def test_opt_3(self): + # NUMBA_OPT='3' should not impact opt or loop_vectorize + expected = {'loop_vectorize': False, + 'slp_vectorize': False, + 'opt': 0, + 'cost': 'cheap'} + self.check(expected, 3, 3) + + @TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': '0'}) + def test_opt_0(self): + # NUMBA_OPT='0' should not impact opt or loop_vectorize + expected = {'loop_vectorize': False, + 'slp_vectorize': False, + 'opt': 0, + 'cost': 'cheap'} + self.check(expected, 0, 0) + + @TestCase.run_test_in_subprocess() + def test_opt_default(self): + # NUMBA_OPT is not set, the default should not impact opt or + # loop_vectorize + expected = {'loop_vectorize': False, + 'slp_vectorize': False, + 'opt': 0, + 'cost': 'cheap'} + self.check(expected, 3, 3) + + @TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': 'invalid'}) + def test_opt_invalid(self): + # NUMBA_OPT='invalid' should just proceed as default case + expected = {'loop_vectorize': False, + 'slp_vectorize': False, + 'opt': 0, + 'cost': 'cheap'} + self.check(expected, 3, 3) + + if __name__ == '__main__': unittest.main()