Skip to content

Commit

Permalink
Merge pull request numba#9094 from stuartarchibald/fix/8172
Browse files Browse the repository at this point in the history
Add support for a 'max' level to NUMBA_OPT environment variable.
  • Loading branch information
esc authored Aug 2, 2023
2 parents d7a03e0 + c91b102 commit 8fa8ddd
Show file tree
Hide file tree
Showing 5 changed files with 182 additions and 10 deletions.
11 changes: 10 additions & 1 deletion docs/source/reference/envvars.rst
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,16 @@ Compilation options

.. envvar:: NUMBA_OPT

The optimization level; this option is passed straight to LLVM.
The optimization level; typically this option is passed straight to LLVM. It
may take one of the values ``0``, ``1``, ``2`` or ``3`` which correspond
approximately to the ``-O{value}`` flag found in many command line
compilation tools. The value ``max`` is also supported, this is Numba
specific, it has the effect of running with the optimization level set at
``3`` both before and after a pass which in which reference count operation
pruning takes place. In some cases this may increase performance, in other
cases it may impede performance, the same can be said for compilation time.
This option is present to give users the opportunity to choose a value
suitable for their application.

*Default value:* 3

Expand Down
13 changes: 13 additions & 0 deletions docs/upcoming_changes/9094.new_feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@

Add support for value ``max`` to ``NUMBA_OPT``.
===============================================

The optimisation level that Numba applies when compiling can be set through the
environment variable ``NUMBA_OPT``. This has historically been a value between
0 and 3 (inclusive). Support for the value ``max`` has now been added, this is a
Numba-specific optimisation level which indicates that the user would like Numba
to try running the most optimisation possible, potentially trading a longer
compilation time for better run-time performance. In practice, use of the ``max``
level of optimisation may or may not benefit the run-time or compile-time
performance of user code, but it has been added to present an easy to access
option for users to try if they so wish.
24 changes: 22 additions & 2 deletions numba/core/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1187,10 +1187,30 @@ def _init(self, llvm_module):
self._engine = JitEngine(engine)
self._target_data = engine.target_data
self._data_layout = str(self._target_data)
self._mpm_cheap = self._module_pass_manager(loop_vectorize=False,

if config.OPT.is_opt_max:
# If the OPT level is set to 'max' then the user is requesting that
# compilation time is traded for potential performance gain. This
# currently manifests as running the "cheap" pass at -O3
# optimisation level with loop-vectorization enabled. There's no
# guarantee that this will increase runtime performance, it may
# detriment it, this is here to give the user an easily accessible
# option to try.
loopvect = True
opt_level = 3
else:
# The default behaviour is to do an opt=0 pass to try and inline as
# much as possible with the cheapest cost of doing so. This is so
# that the ref-op pruner pass that runs after the cheap pass will
# have the largest possible scope for working on pruning references.
loopvect = False
opt_level = 0

self._mpm_cheap = self._module_pass_manager(loop_vectorize=loopvect,
slp_vectorize=False,
opt=0,
opt=opt_level,
cost="cheap")

self._mpm_full = self._module_pass_manager()

self._engine.set_object_cache(self._library_class._object_compiled_hook,
Expand Down
53 changes: 49 additions & 4 deletions numba/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,49 @@ def _validate_captured_errors_style(style_str):
return rendered_style


class _OptLevel(int):
"""This class holds the "optimisation level" set in `NUMBA_OPT`. As this env
var can be an int or a string, but is almost always interpreted as an int,
this class subclasses int so as to get the common behaviour but stores the
actual value as a `_raw_value` member. The value "max" is a special case
and the property `is_opt_max` can be queried to find if the optimisation
level (supplied value at construction time) is "max"."""

def __new__(cls, *args, **kwargs):
assert len(args) == 1
(value,) = args
_int_value = 3 if value == 'max' else int(value)
# the int ctor is always called with an appropriate integer value
new = super().__new__(cls, _int_value, **kwargs)
# raw value is max or int
new._raw_value = value if value == 'max' else _int_value
return new

@property
def is_opt_max(self):
"""Returns True if the the optimisation level is "max" False
otherwise."""
return self._raw_value == "max"

def __repr__(self):
if isinstance(self._raw_value, str):
arg = f"'{self._raw_value}'"
else:
arg = self._raw_value
return f"_OptLevel({arg})"


def _process_opt_level(opt_level):

if opt_level not in ('0', '1', '2', '3', 'max'):
msg = ("Environment variable `NUMBA_OPT` is set to an unsupported "
f"value '{opt_level}', supported values are 0, 1, 2, 3, and "
"'max'")
raise ValueError(msg)
else:
return _OptLevel(opt_level)


class _EnvReloader(object):

def __init__(self):
Expand Down Expand Up @@ -150,9 +193,11 @@ def _readenv(name, ctor, default):
return default() if callable(default) else default
try:
return ctor(value)
except Exception:
warnings.warn("environ %s defined but failed to parse '%s'" %
(name, value), RuntimeWarning)
except Exception as e:
warnings.warn(f"Environment variable '{name}' is defined but "
f"its associated value '{value}' could not be "
f"parsed.\nThe parse failed with exception: {e}.",
RuntimeWarning)
return default

def optional_str(x):
Expand Down Expand Up @@ -259,7 +304,7 @@ def optional_str(x):
("" if str(CPU_NAME).lower() == 'generic'
else None))
# Optimization level
OPT = _readenv("NUMBA_OPT", int, 3)
OPT = _readenv("NUMBA_OPT", _process_opt_level, _OptLevel(3))

# Force dump of Python bytecode
DUMP_BYTECODE = _readenv("NUMBA_DUMP_BYTECODE", int, DEBUG_FRONTEND)
Expand Down
91 changes: 88 additions & 3 deletions numba/tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import tempfile
from textwrap import dedent
import unittest
from unittest import mock
from numba.tests.support import (TestCase, temp_directory, override_env_config,
run_in_subprocess)
from numba.core import config
Expand Down Expand Up @@ -113,11 +114,95 @@ def test_illegal_error_style_handling(self):
code = ("from numba import njit\n@njit\ndef foo():\n\t"
f"print('{source_compiled}')\nfoo()")
out, err = run_in_subprocess(dedent(code), env=new_env)
expected = ("environ NUMBA_CAPTURED_ERRORS defined but failed to parse "
"\'not_a_known_style\'")
self.assertIn(expected, err.decode('utf-8'))
expected = ("Environment variable \'NUMBA_CAPTURED_ERRORS\' is defined "
"but its associated value \'not_a_known_style\' could not "
"be parsed.")
err_msg = err.decode('utf-8')
self.assertIn(expected, err_msg)
ex_expected = ("The parse failed with exception: Invalid style in "
"NUMBA_CAPTURED_ERRORS: not_a_known_style")
self.assertIn(ex_expected, err_msg)
self.assertIn(source_compiled, out.decode('utf-8'))


class TestNumbaOptLevel(TestCase):
# Tests that the setting of NUMBA_OPT influences the "cheap" module pass.
# Spot checks NUMBA_OPT={'max', '3', '0'}

def check(self, expected, opt_value, raw_value):
# local imports for state-safety
from numba import config, njit

# check opt value and its raw_value
self.assertEqual(config.OPT, opt_value)
self.assertEqual(config.OPT._raw_value, raw_value)

# Patch the CPUCodegen to make capture calls to the
# `_module_pass_manager` through a `side_effect` function that asserts
# that the kwargs being passed are as expected per the "NUMBA_OPT"
# level. The `side_effect` function immediately raises with a knwon
# message to abort further stages compilation once the check is
# complete.
from numba.core.codegen import CPUCodegen
side_effect_message = "expected side effect"

def side_effect(*args, **kwargs):
self.assertEqual(kwargs, expected)
raise RuntimeError(side_effect_message)

with mock.patch.object(CPUCodegen, '_module_pass_manager',
side_effect=side_effect):
with self.assertRaises(RuntimeError) as raises:
njit(lambda : ...)()

self.assertIn(side_effect_message, str(raises.exception))

@TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': 'max'})
def test_opt_max(self):
# NUMBA_OPT='max' should set opt to 3 and enable loop_vectorize
expected = {'loop_vectorize': True,
'slp_vectorize': False,
'opt': 3,
'cost': 'cheap'}
self.check(expected, 3, 'max')

@TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': '3'})
def test_opt_3(self):
# NUMBA_OPT='3' should not impact opt or loop_vectorize
expected = {'loop_vectorize': False,
'slp_vectorize': False,
'opt': 0,
'cost': 'cheap'}
self.check(expected, 3, 3)

@TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': '0'})
def test_opt_0(self):
# NUMBA_OPT='0' should not impact opt or loop_vectorize
expected = {'loop_vectorize': False,
'slp_vectorize': False,
'opt': 0,
'cost': 'cheap'}
self.check(expected, 0, 0)

@TestCase.run_test_in_subprocess()
def test_opt_default(self):
# NUMBA_OPT is not set, the default should not impact opt or
# loop_vectorize
expected = {'loop_vectorize': False,
'slp_vectorize': False,
'opt': 0,
'cost': 'cheap'}
self.check(expected, 3, 3)

@TestCase.run_test_in_subprocess(envvars={'NUMBA_OPT': 'invalid'})
def test_opt_invalid(self):
# NUMBA_OPT='invalid' should just proceed as default case
expected = {'loop_vectorize': False,
'slp_vectorize': False,
'opt': 0,
'cost': 'cheap'}
self.check(expected, 3, 3)


if __name__ == '__main__':
unittest.main()

0 comments on commit 8fa8ddd

Please sign in to comment.