From 53d768fcedde68c8638bc549844f17c8106d18b4 Mon Sep 17 00:00:00 2001 From: Stuart Archibald Date: Tue, 25 Jul 2023 13:07:43 +0100 Subject: [PATCH 1/3] Add support for a 'max' level to NUMBA_OPT environment variable. This adds support for a new 'max' level to the NUMBA_OPT environment level. Prior to this values 0, 1, 2 and 3 were supported, these correspond approximately to the -O{level} flag available in many command line based compilation tools. Issue opt=3 both before and after the reference count pruning pass is run (Numba historically had this behaviour as default). This patch re-enables this behaviour and highlights the caveats in the docs (performance may not improve, compilation time may increase and there be no performance gain, but the opposite is also possible, the user can now choose!). Closes #8172 --- docs/source/reference/envvars.rst | 11 +++- docs/upcoming_changes/9094.new_feature.rst | 13 ++++ numba/core/codegen.py | 24 +++++++- numba/core/config.py | 46 ++++++++++++-- numba/tests/test_config.py | 71 ++++++++++++++++++++++ 5 files changed, 158 insertions(+), 7 deletions(-) create mode 100644 docs/upcoming_changes/9094.new_feature.rst diff --git a/docs/source/reference/envvars.rst b/docs/source/reference/envvars.rst index 53aecd12848..eb606ff2a82 100644 --- a/docs/source/reference/envvars.rst +++ b/docs/source/reference/envvars.rst @@ -299,7 +299,16 @@ Compilation options .. envvar:: NUMBA_OPT - The optimization level; this option is passed straight to LLVM. + The optimization level; typically this option is passed straight to LLVM. It + may take one of the values ``0``, ``1``, ``2`` or ``3`` which correspond + approximately to the ``-O{value}`` flag found in many command line + compilation tools. The value ``max`` is also supported, this is Numba + specific, it has the effect of running with the optimization level set at + ``3`` both before and after a pass which in which reference count operation + pruning takes place. In some cases this may increase performance, in other + cases it may impede performance, the same can be said for compilation time. + This option is present to give users the opportunity to choose a value + suitable for their application. *Default value:* 3 diff --git a/docs/upcoming_changes/9094.new_feature.rst b/docs/upcoming_changes/9094.new_feature.rst new file mode 100644 index 00000000000..708f6e97824 --- /dev/null +++ b/docs/upcoming_changes/9094.new_feature.rst @@ -0,0 +1,13 @@ + +Add support for value ``max`` to ``NUMBA_OPT``. +=============================================== + +The optimisation level that Numba applies when compiling can be set through the +environment variable ``NUMBA_OPT``. This has historically been a value between +0 and 3 (inclusive). Support for the value ``max`` has now been added, this is a +Numba-specific optimisation level which will result in Numba running a -O3-like +optimistion before reference counting operations are pruned and then running +another -O3-like optimisation afterwards. This may or may not benefit the +run-time or compile-time performance of user code, but it has been added as it +is similar to the compilation behaviour prior to Numba 0.54 and some users found +it leads to better performance for their programs. diff --git a/numba/core/codegen.py b/numba/core/codegen.py index ced9e87aac7..3dc9daa17b9 100644 --- a/numba/core/codegen.py +++ b/numba/core/codegen.py @@ -1187,10 +1187,30 @@ def _init(self, llvm_module): self._engine = JitEngine(engine) self._target_data = engine.target_data self._data_layout = str(self._target_data) - self._mpm_cheap = self._module_pass_manager(loop_vectorize=False, + + if config.OPT._raw_value == 'max': + # If the OPT level is set to 'max' then run the cheap pass at O3 + # with loop-vectorize enabled. This _may_ result in more optimised + # code, but it also may have the opposite effect. It may also + # increase compilation time, but also may have the opposite effect. + # This behaviour is present so that users can choose what's + # appropriate for their application if they wish to, but there's a + # reasonable default present. + loopvect = True + opt_level = 3 + else: + # The default behaviour is to do an opt=0 pass to try and inline as + # much as possible with the cheapest cost of doing so. This is so + # that the ref-op pruner pass that runs after the cheap pass will + # have the largest possible scope for working on pruning references. + loopvect = False + opt_level = 0 + + self._mpm_cheap = self._module_pass_manager(loop_vectorize=loopvect, slp_vectorize=False, - opt=0, + opt=opt_level, cost="cheap") + self._mpm_full = self._module_pass_manager() self._engine.set_object_cache(self._library_class._object_compiled_hook, diff --git a/numba/core/config.py b/numba/core/config.py index 6c25bc21045..8a9588ea48a 100644 --- a/numba/core/config.py +++ b/numba/core/config.py @@ -80,6 +80,42 @@ def _validate_captured_errors_style(style_str): return rendered_style +class _OptLevel(int): + """This class holds the "optimisation level" set in `NUMBA_OPT`. As this env + var can be an int or a string, but is almost always interpreted as an int. + This class subclasses int so as to get the common behaviour but stores the + actual value as a `_raw_value` member to make it available for cases where + accounting for the specific string supplied by the user is necessary.""" + + def __new__(cls, *args, **kwargs): + assert len(args) == 1 + (value,) = args + _int_value = 3 if value == 'max' else int(value) + # the int ctor is always called with an appropriate integer value + new = super().__new__(cls, _int_value, **kwargs) + # raw value is max or int + new._raw_value = value if value == 'max' else _int_value + return new + + def __repr__(self): + if isinstance(self._raw_value, str): + arg = f"'{self._raw_value}'" + else: + arg = self._raw_value + return f"_OptLevel({arg})" + + +def _process_opt_level(opt_level): + + if opt_level not in ('0', '1', '2', '3', 'max'): + msg = ("Environment variable `NUMBA_OPT` is set to an unsupported " + f"value '{opt_level}', supported values are 0, 1, 2, 3, and " + "'max'") + raise ValueError(msg) + else: + return _OptLevel(opt_level) + + class _EnvReloader(object): def __init__(self): @@ -150,9 +186,11 @@ def _readenv(name, ctor, default): return default() if callable(default) else default try: return ctor(value) - except Exception: - warnings.warn("environ %s defined but failed to parse '%s'" % - (name, value), RuntimeWarning) + except Exception as e: + warnings.warn(f"Environment variable '{name}' is defined but " + f"its associated value '{value}' could not be " + f"parsed.\nThe parse failed with exception: {e}.", + RuntimeWarning) return default def optional_str(x): @@ -259,7 +297,7 @@ def optional_str(x): ("" if str(CPU_NAME).lower() == 'generic' else None)) # Optimization level - OPT = _readenv("NUMBA_OPT", int, 3) + OPT = _readenv("NUMBA_OPT", _process_opt_level, _OptLevel(3)) # Force dump of Python bytecode DUMP_BYTECODE = _readenv("NUMBA_DUMP_BYTECODE", int, DEBUG_FRONTEND) diff --git a/numba/tests/test_config.py b/numba/tests/test_config.py index ef0d4efdc4d..fbbce420fd9 100644 --- a/numba/tests/test_config.py +++ b/numba/tests/test_config.py @@ -2,6 +2,7 @@ import tempfile from textwrap import dedent import unittest +from unittest import mock from numba.tests.support import (TestCase, temp_directory, override_env_config, run_in_subprocess) from numba.core import config @@ -119,5 +120,75 @@ def test_illegal_error_style_handling(self): self.assertIn(source_compiled, out.decode('utf-8')) +class TestNumbaOptLevel(TestCase): + # Tests that the setting of NUMBA_OPT influences the "cheap" module pass. + # Spot checks NUMBA_OPT={'max', '3', '0'} + + def check(self, expected, opt_value, raw_value): + # local imports for state-safety + from numba import config, njit + + # check opt value and its raw_value + self.assertEqual(config.OPT, opt_value) + self.assertEqual(config.OPT._raw_value, raw_value) + + # Patch the CPUCodegen to make capture calls to the + # `_module_pass_manager` through a `side_effect` function that asserts + # that the kwargs being passed are as expected per the "NUMBA_OPT" + # level. The `side_effect` function immediately raises with a knwon + # message to abort further stages compilation once the check is + # complete. + from numba.core.codegen import CPUCodegen + side_effect_message = "expected side effect" + + def side_effect(*args, **kwargs): + self.assertEqual(kwargs, expected) + raise RuntimeError(side_effect_message) + + with mock.patch.object(CPUCodegen, '_module_pass_manager', + side_effect=side_effect): + with self.assertRaises(RuntimeError) as raises: + njit(lambda : ...)() + + self.assertIn(side_effect_message, str(raises.exception)) + + @TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': 'max'}) + def test_opt_max(self): + # NUMBA_OPT='max' should set opt to 3 and enable loop_vectorize + expected = {'loop_vectorize': True, + 'slp_vectorize': False, + 'opt': 3, + 'cost': 'cheap'} + self.check(expected, 3, 'max') + + @TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': '3'}) + def test_opt_3(self): + # NUMBA_OPT='3' should not impact opt or loop_vectorize + expected = {'loop_vectorize': False, + 'slp_vectorize': False, + 'opt': 0, + 'cost': 'cheap'} + self.check(expected, 3, 3) + + @TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': '0'}) + def test_opt_0(self): + # NUMBA_OPT='0' should not impact opt or loop_vectorize + expected = {'loop_vectorize': False, + 'slp_vectorize': False, + 'opt': 0, + 'cost': 'cheap'} + self.check(expected, 0, 0) + + @TestCase.run_test_in_subprocess() + def test_opt_default(self): + # NUMBA_OPT is not set, the default should not impact opt or + # loop_vectorize + expected = {'loop_vectorize': False, + 'slp_vectorize': False, + 'opt': 0, + 'cost': 'cheap'} + self.check(expected, 3, 3) + + if __name__ == '__main__': unittest.main() From c6b226bfbb57af113f0b988dc3126e508bb9a699 Mon Sep 17 00:00:00 2001 From: Stuart Archibald Date: Wed, 26 Jul 2023 17:06:20 +0100 Subject: [PATCH 2/3] Fix test due to error message change. As title. --- numba/tests/test_config.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/numba/tests/test_config.py b/numba/tests/test_config.py index fbbce420fd9..374253aa8b4 100644 --- a/numba/tests/test_config.py +++ b/numba/tests/test_config.py @@ -114,9 +114,14 @@ def test_illegal_error_style_handling(self): code = ("from numba import njit\n@njit\ndef foo():\n\t" f"print('{source_compiled}')\nfoo()") out, err = run_in_subprocess(dedent(code), env=new_env) - expected = ("environ NUMBA_CAPTURED_ERRORS defined but failed to parse " - "\'not_a_known_style\'") - self.assertIn(expected, err.decode('utf-8')) + expected = ("Environment variable \'NUMBA_CAPTURED_ERRORS\' is defined " + "but its associated value \'not_a_known_style\' could not " + "be parsed.") + err_msg = err.decode('utf-8') + self.assertIn(expected, err_msg) + ex_expected = ("The parse failed with exception: Invalid style in " + "NUMBA_CAPTURED_ERRORS: not_a_known_style") + self.assertIn(ex_expected, err_msg) self.assertIn(source_compiled, out.decode('utf-8')) From c91b102d8bd25c15d471eca6bfffb45e87296aee Mon Sep 17 00:00:00 2001 From: Stuart Archibald Date: Thu, 27 Jul 2023 11:16:06 +0100 Subject: [PATCH 3/3] Respond to review. * Update text to reflect that "max" optimisation is trading potentially longer compilation time for potentially better run-time but without being specific about how this is implemented. * Add `is_opt_max` property to `_OptLevel` to avoid attr access. * Add unit test to ensure that invalid opt level behaves as default. --- docs/upcoming_changes/9094.new_feature.rst | 12 ++++++------ numba/core/codegen.py | 16 ++++++++-------- numba/core/config.py | 15 +++++++++++---- numba/tests/test_config.py | 9 +++++++++ 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/docs/upcoming_changes/9094.new_feature.rst b/docs/upcoming_changes/9094.new_feature.rst index 708f6e97824..0cf7938f229 100644 --- a/docs/upcoming_changes/9094.new_feature.rst +++ b/docs/upcoming_changes/9094.new_feature.rst @@ -5,9 +5,9 @@ Add support for value ``max`` to ``NUMBA_OPT``. The optimisation level that Numba applies when compiling can be set through the environment variable ``NUMBA_OPT``. This has historically been a value between 0 and 3 (inclusive). Support for the value ``max`` has now been added, this is a -Numba-specific optimisation level which will result in Numba running a -O3-like -optimistion before reference counting operations are pruned and then running -another -O3-like optimisation afterwards. This may or may not benefit the -run-time or compile-time performance of user code, but it has been added as it -is similar to the compilation behaviour prior to Numba 0.54 and some users found -it leads to better performance for their programs. +Numba-specific optimisation level which indicates that the user would like Numba +to try running the most optimisation possible, potentially trading a longer +compilation time for better run-time performance. In practice, use of the ``max`` +level of optimisation may or may not benefit the run-time or compile-time +performance of user code, but it has been added to present an easy to access +option for users to try if they so wish. diff --git a/numba/core/codegen.py b/numba/core/codegen.py index 3dc9daa17b9..28ab618a0b0 100644 --- a/numba/core/codegen.py +++ b/numba/core/codegen.py @@ -1188,14 +1188,14 @@ def _init(self, llvm_module): self._target_data = engine.target_data self._data_layout = str(self._target_data) - if config.OPT._raw_value == 'max': - # If the OPT level is set to 'max' then run the cheap pass at O3 - # with loop-vectorize enabled. This _may_ result in more optimised - # code, but it also may have the opposite effect. It may also - # increase compilation time, but also may have the opposite effect. - # This behaviour is present so that users can choose what's - # appropriate for their application if they wish to, but there's a - # reasonable default present. + if config.OPT.is_opt_max: + # If the OPT level is set to 'max' then the user is requesting that + # compilation time is traded for potential performance gain. This + # currently manifests as running the "cheap" pass at -O3 + # optimisation level with loop-vectorization enabled. There's no + # guarantee that this will increase runtime performance, it may + # detriment it, this is here to give the user an easily accessible + # option to try. loopvect = True opt_level = 3 else: diff --git a/numba/core/config.py b/numba/core/config.py index 8a9588ea48a..c2edf63984d 100644 --- a/numba/core/config.py +++ b/numba/core/config.py @@ -82,10 +82,11 @@ def _validate_captured_errors_style(style_str): class _OptLevel(int): """This class holds the "optimisation level" set in `NUMBA_OPT`. As this env - var can be an int or a string, but is almost always interpreted as an int. - This class subclasses int so as to get the common behaviour but stores the - actual value as a `_raw_value` member to make it available for cases where - accounting for the specific string supplied by the user is necessary.""" + var can be an int or a string, but is almost always interpreted as an int, + this class subclasses int so as to get the common behaviour but stores the + actual value as a `_raw_value` member. The value "max" is a special case + and the property `is_opt_max` can be queried to find if the optimisation + level (supplied value at construction time) is "max".""" def __new__(cls, *args, **kwargs): assert len(args) == 1 @@ -97,6 +98,12 @@ def __new__(cls, *args, **kwargs): new._raw_value = value if value == 'max' else _int_value return new + @property + def is_opt_max(self): + """Returns True if the the optimisation level is "max" False + otherwise.""" + return self._raw_value == "max" + def __repr__(self): if isinstance(self._raw_value, str): arg = f"'{self._raw_value}'" diff --git a/numba/tests/test_config.py b/numba/tests/test_config.py index 374253aa8b4..fca7780b057 100644 --- a/numba/tests/test_config.py +++ b/numba/tests/test_config.py @@ -194,6 +194,15 @@ def test_opt_default(self): 'cost': 'cheap'} self.check(expected, 3, 3) + @TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': 'invalid'}) + def test_opt_invalid(self): + # NUMBA_OPT='invalid' should just proceed as default case + expected = {'loop_vectorize': False, + 'slp_vectorize': False, + 'opt': 0, + 'cost': 'cheap'} + self.check(expected, 3, 3) + if __name__ == '__main__': unittest.main()