Skip to content

Commit

Permalink
Merge pull request numba#8535 from apmasell/llvm14
Browse files Browse the repository at this point in the history
LLVM14
  • Loading branch information
sklam authored Mar 13, 2023
2 parents ce69f30 + 03cf859 commit c66a986
Show file tree
Hide file tree
Showing 11 changed files with 96 additions and 56 deletions.
2 changes: 1 addition & 1 deletion buildscripts/condarecipe.local/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ requirements:
- setuptools
- importlib_metadata # [py<39]
# On channel https://anaconda.org/numba/
- llvmlite >=0.40.0dev0,<0.40
- llvmlite >=0.40,<0.40
# TBB devel version is to match TBB libs.
# 2020.3 is the last version with the "old" ABI
# NOTE: 2021.1..2021.5 are API compatible for Numba's purposes.
Expand Down
24 changes: 11 additions & 13 deletions numba/core/callconv.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from llvmlite import ir

from numba.core import types, cgutils
from numba.core.base import PYOBJECT, GENERIC_POINTER
from numba.core.base import GENERIC_POINTER


TryStatus = namedtuple('TryStatus', ['in_try', 'excinfo'])
Expand Down Expand Up @@ -90,7 +90,7 @@ def return_native_none(self, builder):
self._return_errcode_raw(builder, RETCODE_NONE)

def return_exc(self, builder):
self._return_errcode_raw(builder, RETCODE_EXC, mark_exc=True)
self._return_errcode_raw(builder, RETCODE_EXC)

def return_stop_iteration(self, builder):
self._return_errcode_raw(builder, RETCODE_STOPIT)
Expand Down Expand Up @@ -207,12 +207,12 @@ def return_user_exc(self, builder, exc, exc_args=None, loc=None,

call_helper = self._get_call_helper(builder)
exc_id = call_helper._add_exception(exc, exc_args, locinfo)
self._return_errcode_raw(builder, _const_int(exc_id), mark_exc=True)
self._return_errcode_raw(builder, _const_int(exc_id))

def return_status_propagate(self, builder, status):
self._return_errcode_raw(builder, status.code)

def _return_errcode_raw(self, builder, code, mark_exc=False):
def _return_errcode_raw(self, builder, code):
if isinstance(code, int):
code = _const_int(code)
builder.ret(code)
Expand Down Expand Up @@ -395,7 +395,9 @@ def set_static_user_exc(self, builder, exc, exc_args=None, loc=None,
exc = (exc, exc_args, locinfo)
struct_gv = pyapi.serialize_object(exc)
excptr = self._get_excinfo_argument(builder.function)
builder.store(struct_gv, excptr)
store = builder.store(struct_gv, excptr)
md = builder.module.add_metadata([ir.IntType(1)(1)])
store.set_metadata("numba_exception_output", md)

def return_user_exc(self, builder, exc, exc_args=None, loc=None,
func_name=None):
Expand All @@ -409,7 +411,7 @@ def return_user_exc(self, builder, exc, exc_args=None, loc=None,
builder.branch(try_info['target'])
else:
# Return from the current function
self._return_errcode_raw(builder, RETCODE_USEREXC, mark_exc=True)
self._return_errcode_raw(builder, RETCODE_USEREXC)

def _get_try_state(self, builder):
try:
Expand Down Expand Up @@ -457,14 +459,10 @@ def return_status_propagate(self, builder, status):
excptr = self._get_excinfo_argument(builder.function)
builder.store(status.excinfoptr, excptr)
with builder.if_then(builder.not_(trystatus.in_try)):
self._return_errcode_raw(builder, status.code, mark_exc=True)
self._return_errcode_raw(builder, status.code)

def _return_errcode_raw(self, builder, code, mark_exc=False):
ret = builder.ret(code)

if mark_exc:
md = builder.module.add_metadata([ir.IntType(1)(1)])
ret.set_metadata("ret_is_raise", md)
def _return_errcode_raw(self, builder, code):
builder.ret(code)

def _get_return_status(self, builder, code, excinfoptr):
"""
Expand Down
25 changes: 16 additions & 9 deletions numba/core/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def init_digraph(name, fname, fontsize):
nrt_meminfo = re.compile("@NRT_MemInfo")
ll_intrin_calls = re.compile(r".*call.*@llvm\..*")
ll_function_call = re.compile(r".*call.*@.*")
ll_raise = re.compile(r"ret i32.*\!ret_is_raise.*")
ll_raise = re.compile(r"store .*\!numba_exception_output.*")
ll_return = re.compile("ret i32 [^1],?.*")

# wrapper function for line wrapping LLVM lines
Expand Down Expand Up @@ -1215,14 +1215,21 @@ def _module_pass_manager(self, **kwargs):
# This knocks loops into rotated form early to reduce the likelihood
# of vectorization failing due to unknown PHI nodes.
pm.add_loop_rotate_pass()
# LLVM 11 added LFTR to the IV Simplification pass, this interacted
# badly with the existing use of the InstructionCombiner here and
# ended up with PHI nodes that prevented vectorization from
# working. The desired vectorization effects can be achieved
# with this in LLVM 11 (and also < 11) but at a potentially
# slightly higher cost:
pm.add_licm_pass()
pm.add_cfg_simplification_pass()
if ll.llvm_version_info[0] < 12:
# LLVM 11 added LFTR to the IV Simplification pass,
# this interacted badly with the existing use of the
# InstructionCombiner here and ended up with PHI nodes that
# prevented vectorization from working. The desired
# vectorization effects can be achieved with this in LLVM 11
# (and also < 11) but at a potentially slightly higher cost:
pm.add_licm_pass()
pm.add_cfg_simplification_pass()
else:
# These passes are required to get SVML to vectorize tests
# properly on LLVM 14
pm.add_instruction_combining_pass()
pm.add_jump_threading_pass()

if config.LLVM_REFPRUNE_PASS:
pm.add_refprune_pass(_parse_refprune_flags())
return pm
Expand Down
15 changes: 11 additions & 4 deletions numba/misc/llvm_pass_timings.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def get(self):
"wall_time",
"wall_percent",
"pass_name",
"instruction",
],
)

Expand Down Expand Up @@ -216,6 +217,7 @@ def parse(raw_data):
"System Time": "system",
"User+System": "user_system",
"Wall Time": "wall",
"Instr": "instruction",
"Name": "pass_name",
}
for ln in line_iter:
Expand All @@ -229,17 +231,22 @@ def parse(raw_data):
assert headers[-1] == 'pass_name'
# compute the list of available attributes from the column headers
attrs = []
n = r"\s*((?:[0-9]+\.)?[0-9]+)"
pat = ""
for k in headers[:-1]:
attrs.append(f"{k}_time")
attrs.append(f"{k}_percent")
if k == "instruction":
pat += n
else:
attrs.append(f"{k}_time")
attrs.append(f"{k}_percent")
pat += f"\\s+(?:{n}\\s*\\({n}%\\)|-+)"

# put default value 0.0 to all missing attributes
missing = {}
for k in PassTimingRecord._fields:
if k not in attrs and k != 'pass_name':
missing[k] = 0.0
# parse timings
n = r"\s*((?:[0-9]+\.)?[0-9]+)"
pat = f"\\s+(?:{n}\\s*\\({n}%\\)|-+)" * (len(headers) - 1)
pat += r"\s*(.*)"
for ln in line_iter:
m = re.match(pat, ln)
Expand Down
5 changes: 5 additions & 0 deletions numba/tests/test_array_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,8 @@ def assert_raises(arr, axis):
assert_raises(arr1d, -2)
assert_raises(arr2d, -3)
assert_raises(arr2d, 2)
# Exceptions leak references
self.disable_leak_check()

def test_argmax_axis_must_be_integer(self):
arr = np.arange(6)
Expand Down Expand Up @@ -1073,6 +1075,9 @@ def assert_raises(arr, axis):
assert_raises(arr2d, -3)
assert_raises(arr2d, 2)

# Exceptions leak references
self.disable_leak_check()

def test_argmin_axis_must_be_integer(self):
arr = np.arange(6)

Expand Down
15 changes: 11 additions & 4 deletions numba/tests/test_debuginfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def test_DILocation(self):
@njit(debug=True, error_model='numpy')
def foo(a):
b = a + 1.23
c = a * 2.34
c = b * 2.34
d = b / c
print(d)
return d
Expand Down Expand Up @@ -223,9 +223,16 @@ def foo(a):

# Find non-call instr and check the sequence is as expected
instrs = [x for x in block.instructions if x.opcode != 'call']
op_seq = [x.opcode for x in instrs]
op_expect = ('fadd', 'fmul', 'fdiv')
self.assertIn(''.join(op_expect), ''.join(op_seq))
op_expect = {'fadd', 'fmul', 'fdiv'}
started = False
for x in instrs:
if x.opcode in op_expect:
op_expect.remove(x.opcode)
if not started:
started = True
elif op_expect and started:
self.fail("Math opcodes are not contiguous")
self.assertFalse(op_expect, "Math opcodes were not found")

# Parse out metadata from end of each line, check it monotonically
# ascends with LLVM source line. Also store all the dbg references,
Expand Down
14 changes: 14 additions & 0 deletions numba/tests/test_np_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,8 @@ def test_sinc_exceptions(self):
cfunc('str')
self.assertIn('Argument "x" must be a Number or array-like',
str(raises.exception))
# Exceptions leak references
self.disable_leak_check()

def test_contains(self):
def arrs():
Expand Down Expand Up @@ -650,6 +652,8 @@ def test_angle_exceptions(self):
cfunc('hello')
self.assertIn('Argument "z" must be a complex or Array[complex]',
str(raises.exception))
# Exceptions leak references
self.disable_leak_check()

def test_array_equal(self):
def arrays():
Expand Down Expand Up @@ -777,6 +781,8 @@ def test_np_append_exceptions(self):
'The third argument "axis" must be an integer',
str(raises.exception)
)
# Exceptions leak references
self.disable_leak_check()

def test_delete(self):

Expand Down Expand Up @@ -840,6 +846,8 @@ def test_delete_exceptions(self):
'obj must be less than the len(arr)',
str(raises.exception),
)
# Exceptions leak references
self.disable_leak_check()

def diff_arrays(self):
"""
Expand Down Expand Up @@ -893,6 +901,8 @@ def test_diff2_exceptions(self):
with self.assertRaises(ValueError) as raises:
cfunc(arr, n)
self.assertIn("order must be non-negative", str(raises.exception))
# Exceptions leak references
self.disable_leak_check()

def test_isscalar(self):
def values():
Expand Down Expand Up @@ -1097,6 +1107,8 @@ def test_bincount1_exceptions(self):
cfunc([2, -1])
self.assertIn("first argument must be non-negative",
str(raises.exception))
# Exceptions leak references
self.disable_leak_check()

def test_bincount2(self):
pyfunc = bincount2
Expand Down Expand Up @@ -5299,6 +5311,8 @@ def not_literal_axis(a, i, axis):
with self.assertRaises(ValueError) as raises:
gen(0)(arr2d, np.ones((2, 3), dtype=np.uint64))
self.assertIn("dimensions don't match", str(raises.exception))
# Exceptions leak references
self.disable_leak_check()

def test_nan_to_num(self):
# Test cases are from
Expand Down
6 changes: 6 additions & 0 deletions numba/tests/test_np_randomgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,8 @@ def test_noncentral_chisquare(self):
curr_args[2] = -1
nb_dist_func(*curr_args)
self.assertIn('nonc < 0', str(raises.exception))
# Exceptions leak references
self.disable_leak_check()

def test_noncentral_f(self):
# For this test dtype argument is never used, so we pass [None] as dtype
Expand Down Expand Up @@ -1141,6 +1143,8 @@ def test_noncentral_f(self):
curr_args[3] = -1
nb_dist_func(*curr_args)
self.assertIn('nonc < 0', str(raises.exception))
# Exceptions leak references
self.disable_leak_check()

def test_logseries(self):
# For this test dtype argument is never used, so we pass [None] as dtype
Expand Down Expand Up @@ -1173,6 +1177,8 @@ def test_logseries(self):
curr_args[1] = _p
nb_dist_func(*curr_args)
self.assertIn('p < 0, p >= 1 or p is NaN', str(raises.exception))
# Exceptions leak references
self.disable_leak_check()


class TestGeneratorCaching(TestCase, SerialMixin):
Expand Down
18 changes: 4 additions & 14 deletions numba/tests/test_parfors.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ def _get_fast_instructions(ir):
return fast_inst

def _assert_fast(instrs):
ops = ('fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp')
ops = ('fadd', 'fsub', 'fmul', 'fdiv', 'frem', 'fcmp', 'call')
for inst in instrs:
count = 0
for op in ops:
Expand Down Expand Up @@ -4462,11 +4462,6 @@ def get_gufunc_asm(self, func, schedule_type, *args, **kwargs):

return asm

# this is a common match pattern for something like:
# \n\tvsqrtpd\t-192(%rbx,%rsi,8), %zmm0\n
# to check vsqrtpd operates on zmm
match_vsqrtpd_on_zmm = re.compile('\n\s+vsqrtpd\s+.*zmm.*\n')

@linux_only
def test_vectorizer_fastmath_asm(self):
""" This checks that if fastmath is set and the underlying hardware
Expand All @@ -4490,22 +4485,19 @@ def will_vectorize(A):
fastmath=True)
slow_asm = self.get_gufunc_asm(will_vectorize, 'unsigned', arg,
fastmath=False)

for v in fast_asm.values():
# should unwind and call vector sqrt then vector add
# all on packed doubles using zmm's
self.assertTrue('vaddpd' in v)
self.assertTrue('vsqrtpd' in v)
self.assertTrue('vsqrtpd' in v or '__svml_sqrt' in v)
self.assertTrue('zmm' in v)
# make sure vsqrtpd operates on zmm
self.assertTrue(len(self.match_vsqrtpd_on_zmm.findall(v)) > 1)

for v in slow_asm.values():
# vector variants should not be present
self.assertTrue('vaddpd' not in v)
self.assertTrue('vsqrtpd' not in v)
# check scalar variant is present
self.assertTrue('vsqrtsd' in v)
self.assertTrue('vsqrtsd' in v and '__svml_sqrt' not in v)
self.assertTrue('vaddsd' in v)
# check no zmm addressing is present
self.assertTrue('zmm' not in v)
Expand Down Expand Up @@ -4550,11 +4542,9 @@ def will_vectorize(A):
for v in vec_asm.values():
# should unwind and call vector sqrt then vector mov
# all on packed doubles using zmm's
self.assertTrue('vsqrtpd' in v)
self.assertTrue('vsqrtpd' in v or '__svml_sqrt' in v)
self.assertTrue('vmovupd' in v)
self.assertTrue('zmm' in v)
# make sure vsqrtpd operates on zmm
self.assertTrue(len(self.match_vsqrtpd_on_zmm.findall(v)) > 1)

@linux_only
# needed as 32bit doesn't have equivalent signed/unsigned instruction
Expand Down
16 changes: 16 additions & 0 deletions numba/tests/test_refop_pruning.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,22 @@ def func(n):
raise ValueError
return x

with set_refprune_flags('per_bb,fanout'):
self.check(func, (types.intp), basicblock=True, diamond=False,
fanout=True, fanout_raise=False)

def test_fanout_3(self):
# fanout with raise
def func(n):
ary = np.arange(n)
# basically an impl of array.sum
c = 0
# The raise is from StopIteration of next(iterator) implicit in
# the for loop
for v in np.nditer(ary):
c += v.item()
return 1

with set_refprune_flags('per_bb,fanout_raise'):
self.check(func, (types.intp), basicblock=True, diamond=False,
fanout=False, fanout_raise=True)
Expand Down
Loading

0 comments on commit c66a986

Please sign in to comment.