From 5889e76847a852fc00a5f3f5f4935e9074983400 Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Wed, 1 Apr 2026 13:32:05 -0700 Subject: [PATCH] Fix SLEEF preprocessor macro name to match ATen vec headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ATen NEON vectorized math headers (vec128_float_neon.h) check for AT_BUILD_ARM_VEC256_WITH_SLEEF to enable SLEEF intrinsics for exp(), log(), etc. ExecuTorch's get_vec_preprocessor_flags() was defining ET_BUILD_ARM_VEC256_WITH_SLEEF (wrong prefix), so the USE_SLEEF macro always took the fallback path: map(std::exp) — scalar exp called per-element with full vector load/store overhead wrapping it. With this fix, Vectorized::exp() correctly dispatches to Sleef_expf4_u10 on ARM, which is the intended behavior. Differential Revision: [D96044314](https://our.internmc.facebook.com/intern/diff/D96044314/) [ghstack-poisoned] --- kernels/optimized/lib_defs.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernels/optimized/lib_defs.bzl b/kernels/optimized/lib_defs.bzl index 928fc44635d..1e4684521e1 100644 --- a/kernels/optimized/lib_defs.bzl +++ b/kernels/optimized/lib_defs.bzl @@ -25,16 +25,16 @@ def get_vec_preprocessor_flags(): # various ovr_configs are not available in oss preprocessor_flags = select({ "ovr_config//os:linux-x86_64": [ - "-DET_BUILD_ARM_VEC256_WITH_SLEEF", + "-DAT_BUILD_ARM_VEC256_WITH_SLEEF", ] if not runtime.is_oss else [], "ovr_config//os:iphoneos-arm64": [ - "-DET_BUILD_ARM_VEC256_WITH_SLEEF", + "-DAT_BUILD_ARM_VEC256_WITH_SLEEF", ] if not runtime.is_oss else [], "ovr_config//os:macos-arm64": [ - "-DET_BUILD_ARM_VEC256_WITH_SLEEF", + "-DAT_BUILD_ARM_VEC256_WITH_SLEEF", ] if not runtime.is_oss else [], "ovr_config//os:android-arm64": [ - "-DET_BUILD_ARM_VEC256_WITH_SLEEF", + "-DAT_BUILD_ARM_VEC256_WITH_SLEEF", ] if not runtime.is_oss else [], "DEFAULT": [], })