Skip to content

Commit 822943a

Browse files
committed
Extend SME2.1 intrinsics to mf8
SME2.1 intrinsics were developed in parallel with FP8 and thus lacked support for the svmfloat8_t type. This patch adds support for consistency.
1 parent afd6b56 commit 822943a

File tree

1 file changed

+14
-11
lines changed

1 file changed

+14
-11
lines changed

main/acle.md

+14-11
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ toc: true
1111
---
1212

1313
<!--
14-
SPDX-FileCopyrightText: Copyright 2011-2024 Arm Limited and/or its affiliates <[email protected]>
14+
SPDX-FileCopyrightText: Copyright 2011-2025 Arm Limited and/or its affiliates <[email protected]>
1515
SPDX-FileCopyrightText: Copyright 2022 Google LLC.
1616
CC-BY-SA-4.0 AND Apache-Patent-License
1717
See LICENSE.md file for details
@@ -435,6 +435,7 @@ Armv8.4-A [[ARMARMv84]](#ARMARMv84). Support is added for the Dot Product intrin
435435
* Added [`__arm_agnostic`](#arm_agnostic) keyword attribute.
436436
* Refined function versioning scope and signature rules to use the default
437437
version scope and signature.
438+
* Added mf8 variants of SME 2.1 intrinsics.
438439

439440
### References
440441

@@ -12509,7 +12510,7 @@ The intrinsics in this section are defined by the header file
1250912510
Move and zero ZA tile slice to vector register.
1251012511

1251112512
```
12512-
// And similarly for u8.
12513+
// And similarly for u8 and mf8.
1251312514
svint8_t svreadz_hor_za8_s8(uint64_t tile, uint32_t slice)
1251412515
__arm_streaming __arm_inout("za");
1251512516

@@ -12525,11 +12526,12 @@ Move and zero ZA tile slice to vector register.
1252512526
svint64_t svreadz_hor_za64_s64(uint64_t tile, uint32_t slice)
1252612527
__arm_streaming __arm_inout("za");
1252712528

12528-
// And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
12529+
// And similarly for s16, s32, s64, u8, u16, u32, u64,
12530+
// mf8, bf16, f16, f32, f64
1252912531
svint8_t svreadz_hor_za128_s8(uint64_t tile, uint32_t slice)
1253012532
__arm_streaming __arm_inout("za");
1253112533

12532-
// And similarly for u8.
12534+
// And similarly for u8 and mf8.
1253312535
svint8_t svreadz_ver_za8_s8(uint64_t tile, uint32_t slice)
1253412536
__arm_streaming __arm_inout("za");
1253512537

@@ -12545,7 +12547,8 @@ Move and zero ZA tile slice to vector register.
1254512547
svint64_t svreadz_ver_za64_s64(uint64_t tile, uint32_t slice)
1254612548
__arm_streaming __arm_inout("za");
1254712549

12548-
// And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
12550+
// And similarly for s16, s32, s64, u8, u16, u32, u64,
12551+
// mf8, bf16, f16, f32, f64
1254912552
svint8_t svreadz_ver_za128_s8(uint64_t tile, uint32_t slice)
1255012553
__arm_streaming __arm_inout("za");
1255112554
```
@@ -12555,28 +12558,28 @@ Move and zero ZA tile slice to vector register.
1255512558
Move and zero multiple ZA tile slices to vector registers
1255612559

1255712560
``` c
12558-
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12561+
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
1255912562
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
1256012563
// _za64_s64, _za64_u64 and _za64_f64
1256112564
svint8x2_t svreadz_hor_za8_s8_vg2(uint64_t tile, uint32_t slice)
1256212565
__arm_streaming __arm_inout("za");
1256312566

1256412567

12565-
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12568+
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
1256612569
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
1256712570
// _za64_s64, _za64_u64 and _za64_f64
1256812571
svint8x4_t svreadz_hor_za8_s8_vg4(uint64_t tile, uint32_t slice)
1256912572
__arm_streaming __arm_inout("za");
1257012573

1257112574

12572-
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12575+
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
1257312576
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
1257412577
// _za64_s64, _za64_u64 and _za64_f64
1257512578
svint8x2_t svreadz_ver_za8_s8_vg2(uint64_t tile, uint32_t slice)
1257612579
__arm_streaming __arm_inout("za");
1257712580

1257812581

12579-
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12582+
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
1258012583
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
1258112584
// _za64_s64, _za64_u64 and _za64_f64
1258212585
svint8x4_t svreadz_ver_za8_s8_vg4(uint64_t tile, uint32_t slice)
@@ -12588,14 +12591,14 @@ Move and zero multiple ZA tile slices to vector registers
1258812591
Move and zero multiple ZA single-vector groups to vector registers
1258912592

1259012593
```
12591-
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12594+
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
1259212595
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
1259312596
// _za64_s64, _za64_u64 and _za64_f64
1259412597
svint8x2_t svreadz_za8_s8_vg1x2(uint32_t slice)
1259512598
__arm_streaming __arm_inout("za");
1259612599

1259712600

12598-
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12601+
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
1259912602
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
1260012603
// _za64_s64, _za64_u64 and _za64_f64
1260112604
svint8x4_t svreadz_za8_s8_vg1x4(uint32_t slice)

0 commit comments

Comments
 (0)