Skip to content

Commit ab83bbb

Browse files
authored
Merge branch 'main' into mlx-add-bitwise-or
2 parents af4d4bf + 37effad commit ab83bbb

67 files changed

Lines changed: 1521 additions & 309 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.lintrunner.toml

Lines changed: 98 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,16 +109,106 @@ is_formatter = true
109109
[[linter]]
110110
code = 'CPPCHECK'
111111
include_patterns = [
112-
'backends/arm/**/*.cpp',
113-
'backends/arm/**/*.h',
114-
'backends/arm/**/*.hpp',
115-
'backends/cortex_m/**/*.cpp',
116-
'backends/cortex_m/**/*.h',
117-
'examples/arm/**/*.cpp',
118-
'examples/arm/**/*.h',
119-
'examples/arm/**/*.hpp',
112+
'**/*.cpp',
113+
'**/*.h',
114+
'**/*.hpp',
120115
]
121116
exclude_patterns = [
117+
# Third-party and vendored code.
118+
'third-party/**',
119+
'third_party/**',
120+
'**/third-party/**',
121+
'**/third_party/**',
122+
123+
# Mirrored sources under src/ (Python package layout). Prefer linting canonical paths.
124+
'src/executorch/**',
125+
# PyTorch compatibility code kept in sync with upstream.
126+
'runtime/core/portable_type/c10/**',
127+
128+
# Generated sources, templates, and codegen tooling to onboard separately.
129+
'codegen/templates/**',
130+
'codegen/tools/selective_build.cpp',
131+
'exir/_serialize/**',
132+
133+
# Backend-owned code to onboard separately.
134+
'backends/aoti/**',
135+
'backends/apple/**',
136+
'backends/cadence/**',
137+
'backends/cuda/**',
138+
'backends/mediatek/**',
139+
'backends/mlx/**',
140+
'backends/nxp/**',
141+
'backends/openvino/**',
142+
'backends/qualcomm/**',
143+
'backends/samsung/**',
144+
'backends/test/**',
145+
'backends/vulkan/**',
146+
'backends/webgpu/**',
147+
'backends/xnnpack/**',
148+
149+
# Backend-owned examples to onboard with those backends.
150+
'examples/demo-apps/**',
151+
'examples/mediatek/**',
152+
'examples/nxp/**',
153+
'examples/qualcomm/**',
154+
'examples/samsung/**',
155+
156+
# Other examples to onboard separately.
157+
'examples/devtools/**',
158+
'examples/llm_manual/**',
159+
'examples/models/**',
160+
'examples/portable/**',
161+
'examples/raspberry_pi/**',
162+
163+
# EXIR and devtools areas to onboard separately.
164+
'devtools/bundled_program/**',
165+
'devtools/etdump/**',
166+
'exir/backend/test/**',
167+
'exir/tests/**',
168+
'exir/verification/**',
169+
170+
# Extension areas to onboard incrementally.
171+
'extension/android/**',
172+
'extension/apple/**',
173+
'extension/asr/runner/transducer_runner.h',
174+
'extension/aten_util/**',
175+
'extension/benchmark/apple/**',
176+
'extension/data_loader/**',
177+
'extension/evalue_util/**',
178+
'extension/flat_tensor/**',
179+
'extension/kernel_util/make_boxed_from_unboxed_functor.h',
180+
'extension/kernel_util/test/**',
181+
'extension/llm/**',
182+
'extension/memory_allocator/**',
183+
'extension/module/**',
184+
'extension/named_data_map/**',
185+
'extension/pybindings/**',
186+
'extension/pytree/**',
187+
'extension/runner_util/**',
188+
'extension/tensor/**',
189+
'extension/testing_util/**',
190+
'extension/threadpool/**',
191+
'extension/training/**',
192+
'extension/wasm/**',
193+
194+
# Kernel areas to onboard separately.
195+
'kernels/aten/**',
196+
'kernels/optimized/**',
197+
'kernels/portable/**',
198+
'kernels/prim_ops/**',
199+
'kernels/quantized/**',
200+
'kernels/test/**',
201+
202+
# Runtime areas to onboard incrementally.
203+
'runtime/backend/**',
204+
'runtime/core/**',
205+
'runtime/executor/**',
206+
'runtime/kernel/**',
207+
'runtime/platform/**',
208+
209+
# Top-level test and platform integration areas.
210+
'test/**',
211+
'zephyr/**',
122212
]
123213
command = [
124214
'python',

backends/apple/coreml/CMakeLists.txt

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -230,12 +230,6 @@ if(APPLE)
230230

231231
executorch_target_link_options_shared_lib(coremldelegate)
232232

233-
if(EXECUTORCH_COREML_BUILD_EXECUTOR_RUNNER)
234-
target_link_libraries(
235-
coremldelegate PRIVATE portable_ops_lib portable_kernels
236-
)
237-
endif()
238-
239233
target_compile_options(
240234
coremldelegate PRIVATE -fobjc-arc -fno-exceptions -x objective-c++
241235
-Wno-null-character -Wno-receiver-expr

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from .decompose_cumsum_pass import DecomposeCumsumPass # noqa
4343
from .decompose_div_pass import DecomposeDivPass # noqa
4444
from .decompose_div_tensor_mode import DecomposeDivTensorModePass # noqa
45+
from .decompose_dynamic_full_pass import DecomposeDynamicFullPass # noqa
4546
from .decompose_einsum_pass import DecomposeEinsumPass # noqa
4647
from .decompose_elu_pass import ConvertEluFamilyToEluPass, DecomposeEluPass # noqa
4748
from .decompose_embedding_pass import DecomposeEmbeddingPass # noqa # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
DecomposeCumsumPass,
5050
DecomposeDivPass,
5151
DecomposeDivTensorModePass,
52+
DecomposeDynamicFullPass,
5253
DecomposeEinsumPass,
5354
DecomposeEluPass,
5455
DecomposeEmbeddingPass,
@@ -150,10 +151,7 @@
150151
)
151152
from executorch.backends.arm._passes.arm_pass import ArmPass
152153
from executorch.backends.arm.common.arm_compile_spec import ArmCompileSpec
153-
from executorch.backends.arm.common.pipeline_config import (
154-
ArmPassPipelineConfig,
155-
SoftmaxDecompositionConfig,
156-
)
154+
from executorch.backends.arm.common.pipeline_config import SoftmaxDecompositionConfig
157155
from executorch.backends.arm.tosa.specification import (
158156
tosa_spec_in_set,
159157
TosaLoweringContext,
@@ -221,16 +219,13 @@ def __init__(self, compile_spec: ArmCompileSpec) -> None:
221219
super().__init__()
222220
self.configure_skip_passes()
223221

224-
def configure_skip_passes(
225-
self,
226-
override_config: ArmPassPipelineConfig | None = None,
227-
) -> tuple[type, ...]:
222+
def configure_skip_passes(self) -> tuple[type, ...]:
228223
"""Configures the pass manager to skip certain passes based on the
229224
ArmPassPipelineConfig class found in the compile spec.
230225
"""
231226
skip_set: set[type] = set()
232227

233-
config = override_config or self.compile_spec._get_pass_pipeline_config()
228+
config = self.compile_spec._get_pass_pipeline_config()
234229
logger.debug(f"Skip Config: {config}")
235230

236231
match config.softmax:
@@ -502,6 +497,7 @@ def _tosa_pipeline(
502497
ConvertMinMaxPass(),
503498
DecomposeAnyPass(),
504499
DecorateFp32toInt32CastingPass(),
500+
DecomposeDynamicFullPass(),
505501
ConvertExpandCopyToRepeatPass(),
506502
UnsqueezeBeforeRepeatPass(),
507503
DecomposeCumsumPass(exported_program),
@@ -588,6 +584,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
588584
DecomposeIndexCopyPass(tfa_pass=True),
589585
DecomposeSelectScatterPass(tfa_pass=True),
590586
DecomposeSliceScatterPass(tfa_pass=True),
587+
DecomposeDynamicFullPass(tfa_pass=True),
591588
ConvertInt64ConstOpsToInt32Pass(tfa_pass=True),
592589
ConvertInt64OutputOpsToInt32Pass(tfa_pass=True),
593590
InsertInt32CastsAfterInt64PlaceholdersPass(tfa_pass=True),
@@ -649,9 +646,14 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
649646
)
650647

651648
# Postprocessing passes
649+
quant_inf_cfg = self.compile_spec._get_pass_pipeline_config().quantize_inf
652650
self.add_passes(
653651
[
654-
ReplaceInfAndLimitValuesPass(tfa_pass=True),
652+
ReplaceInfAndLimitValuesPass(
653+
quant_inf_cfg.neg_inf,
654+
quant_inf_cfg.pos_inf,
655+
tfa_pass=True,
656+
),
655657
DecomposeMaskedFillPass(tfa_pass=True),
656658
DeduplicateGetAttrPass(tfa_pass=True),
657659
]
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
from typing import Any, Set, Type
7+
8+
import torch
9+
from executorch.backends.arm._passes.arm_pass import ArmPass
10+
from executorch.backends.arm._passes.unsqueeze_before_repeat_pass import (
11+
UnsqueezeBeforeRepeatPass,
12+
)
13+
from executorch.exir.dialects._ops import ops as exir_ops
14+
from executorch.exir.pass_base import ExportPass
15+
16+
17+
class DecomposeDynamicFullPass(ArmPass):
18+
"""Rewrite dynamic-shape `full` into scalar `full` plus `repeat`."""
19+
20+
_passes_required_after: Set[Type[ExportPass]] = {UnsqueezeBeforeRepeatPass}
21+
22+
full_targets = {
23+
torch.ops.aten.full.default,
24+
exir_ops.edge.aten.full.default,
25+
}
26+
repeat = exir_ops.edge.aten.repeat.default
27+
28+
@staticmethod
29+
def _has_symbolic_extent(size: Any) -> bool:
30+
return isinstance(size, (list, tuple)) and any(
31+
not isinstance(dim, int) for dim in size
32+
)
33+
34+
def call_operator(self, op, args, kwargs, meta, updated=False):
35+
if op not in self.full_targets:
36+
return super().call_operator(op, args, kwargs, meta, updated)
37+
38+
size, fill_value = args[:2]
39+
if not self._has_symbolic_extent(size):
40+
return super().call_operator(op, args, kwargs, meta, updated)
41+
42+
scalar_full = super().call_operator(
43+
op=op,
44+
args=((1,), fill_value),
45+
kwargs=kwargs,
46+
meta=meta,
47+
updated=True,
48+
)
49+
return super().call_operator(
50+
op=self.repeat,
51+
args=(scalar_full, size),
52+
kwargs={},
53+
meta=meta,
54+
updated=True,
55+
)

backends/arm/_passes/insert_const_shapes.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,13 @@ def _is_shape_arg(arg: Any) -> bool:
4040
and all(isinstance(x, int) for x in arg)
4141
)
4242

43+
def call(self, graph_module):
44+
self._const_shape_cache.clear()
45+
try:
46+
return super().call(graph_module)
47+
finally:
48+
self._const_shape_cache.clear()
49+
4350
def call_operator(self, op, args, kwargs, meta, updated: Optional[bool] = False):
4451
if op not in self.target_ops:
4552
return super().call_operator(op, args, kwargs, meta, updated)

backends/arm/_passes/replace_inf_and_limit_values_pass.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,22 @@
1616

1717
class ReplaceInfAndLimitValuesPass(ArmPass):
1818
"""Rewrites +inf/-inf and floating-point limit values (e.g.,
19-
torch.finfo(...).min/max) to quantization-friendly values (±255 by default),
19+
torch.finfo(...).min/max) to configured quantization-friendly values,
2020
improving quantizer stability (notably for attention mask paths).
2121
"""
2222

2323
_passes_required_after: Set[Type[ExportPass]] = set()
2424

25+
def __init__(
26+
self,
27+
neg_inf: float,
28+
pos_inf: float,
29+
tfa_pass: bool = False,
30+
):
31+
super().__init__(tfa_pass=tfa_pass)
32+
self.neg_inf = neg_inf
33+
self.pos_inf = pos_inf
34+
2535
def _allowed_to_transform_named_buffer(self, buf_name, graph_module) -> bool:
2636
attr_nodes = [
2737
node
@@ -51,19 +61,19 @@ def call(self, graph_module: torch.fx.GraphModule):
5161
continue
5262

5363
modified = True
54-
# 255 here is mainly for attention_mask in Llama for reasonable quant scale
55-
t = torch.nan_to_num(tensor, posinf=255, neginf=-255)
64+
65+
t = torch.nan_to_num(tensor, posinf=self.pos_inf, neginf=self.neg_inf)
5666
setattr(graph_module, buf_name, t)
5767

5868
for node in graph_module.graph.nodes:
5969
arg_list = list(node.args)
6070
for index, arg in enumerate(arg_list):
6171
if arg == float("-inf") or arg == torch.finfo(torch.float32).min:
6272
modified = True
63-
arg_list[index] = -255.0
73+
arg_list[index] = self.neg_inf
6474
elif arg == float("inf") or arg == torch.finfo(torch.float32).max:
6575
modified = True
66-
arg_list[index] = +255.0
76+
arg_list[index] = self.pos_inf
6777
node.args = tuple(arg_list)
6878

6979
if modified:

0 commit comments

Comments
 (0)