Skip to content

Commit 8e3af72

Browse files
authored
Extend SME2.1 intrinsics to mf8 (#375)
SME2.1 intrinsics were developed in parallel with FP8 and thus lacked support for the svmfloat8_t type. This patch adds support for consistency.
1 parent 0cecab2 commit 8e3af72

File tree

1 file changed

+64
-44
lines changed

1 file changed

+64
-44
lines changed

main/acle.md

+64-44
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ toc: true
1111
---
1212

1313
<!--
14-
SPDX-FileCopyrightText: Copyright 2011-2024 Arm Limited and/or its affiliates <[email protected]>
14+
SPDX-FileCopyrightText: Copyright 2011-2025 Arm Limited and/or its affiliates <[email protected]>
1515
SPDX-FileCopyrightText: Copyright 2022 Google LLC.
1616
CC-BY-SA-4.0 AND Apache-Patent-License
1717
See LICENSE.md file for details
@@ -438,9 +438,9 @@ Armv8.4-A [[ARMARMv84]](#ARMARMv84). Support is added for the Dot Product intrin
438438
* Refined function versioning scope and signature rules to use the default
439439
version scope and signature.
440440
* Added `_n` forms of the SVE2p1 and SME2 `svdot` intrinsics.
441-
* Changed the status of the SME2p1 ACLE from Alpha to Beta.
442-
* Changed the status of the SVE2p1 ACLE from Alpha to Beta.
443-
441+
* Changed the status of the SME2p1 from Alpha to Beta.
442+
* Changed the status of the SVE2p1 from Alpha to Beta.
443+
* Added mf8 variants of SME 2.1 intrinsics.
444444

445445
### References
446446

@@ -12160,85 +12160,97 @@ Lookup table read with 2-bit and 4-bit indexes
1216012160
Move multi-vectors to/from ZA
1216112161

1216212162
``` c
12163-
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
12164-
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12163+
// Variants are also available for _za8_u8, _za8_mf8,
12164+
// _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12165+
// _za32_s32, _za32_u32, _za32_f32,
1216512166
// _za64_s64, _za64_u64 and _za64_f64
1216612167
svint8x2_t svread_hor_za8_s8_vg2(uint64_t tile, uint32_t slice)
1216712168
__arm_streaming __arm_in("za");
1216812169

1216912170

12170-
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
12171-
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12171+
// Variants are also available for _za8_u8, _za8_mf8,
12172+
// _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12173+
// _za32_s32, _za32_u32, _za32_f32,
1217212174
// _za64_s64, _za64_u64 and _za64_f64
1217312175
svint8x4_t svread_hor_za8_s8_vg4(uint64_t tile, uint32_t slice)
1217412176
__arm_streaming __arm_in("za");
1217512177

1217612178

12177-
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
12178-
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12179+
// Variants are also available for _za8_u8, _za8_mf8,
12180+
// _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12181+
// _za32_s32, _za32_u32, _za32_f32,
1217912182
// _za64_s64, _za64_u64 and _za64_f64
1218012183
svint8x2_t svread_ver_za8_s8_vg2(uint64_t tile, uint32_t slice)
1218112184
__arm_streaming __arm_in("za");
1218212185

1218312186

12184-
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
12185-
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12187+
// Variants are also available for _za8_u8, _za8_mf8,
12188+
// _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12189+
// _za32_s32, _za32_u32, _za32_f32,
1218612190
// _za64_s64, _za64_u64 and _za64_f64
1218712191
svint8x4_t svread_ver_za8_s8_vg4(uint64_t tile, uint32_t slice)
1218812192
__arm_streaming __arm_in("za");
1218912193

1219012194

12191-
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
12192-
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12195+
// Variants are also available for _za8_u8, _za8_mf8,
12196+
// _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12197+
// _za32_s32, _za32_u32, _za32_f32,
1219312198
// _za64_s64, _za64_u64 and _za64_f64
1219412199
svint8x2_t svread_za8_s8_vg1x2(uint32_t slice)
1219512200
__arm_streaming __arm_in("za");
1219612201

1219712202

12198-
// Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
12199-
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12203+
// Variants are also available for _za8_u8, _za8_mf8,
12204+
// _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12205+
// _za32_s32, _za32_u32, _za32_f32,
1220012206
// _za64_s64, _za64_u64 and _za64_f64
1220112207
svint8x4_t svread_za8_s8_vg1x4(uint32_t slice)
1220212208
__arm_streaming __arm_in("za");
1220312209

1220412210

12205-
// Variants are also available for _za8[_u8], _za8[_mf8], _za16[_s16], _za16[_u16],
12206-
// _za16[_f16], _za16[_bf16], _za32[_s32], _za32[_u32], _za32[_f32],
12211+
// Variants are also available for _za8[_u8], _za8[_mf8],
12212+
// _za16[_s16], _za16[_u16], _za16[_f16], _za16[_bf16],
12213+
// _za32[_s32], _za32[_u32], _za32[_f32],
1220712214
// _za64[_s64], _za64[_u64] and _za64[_f64]
1220812215
void svwrite_hor_za8[_s8]_vg2(uint64_t tile, uint32_t slice, svint8x2_t zn)
1220912216
__arm_streaming __arm_inout("za");
1221012217

1221112218

12212-
// Variants are also available for _za8[_u8], _za8[_mf8], _za16[_s16], _za16[_u16],
12213-
// _za16[_f16], _za16[_bf16], _za32[_s32], _za32[_u32], _za32[_f32],
12219+
// Variants are also available for _za8[_u8], _za8[_mf8],
12220+
// _za16[_s16], _za16[_u16], _za16[_f16], _za16[_bf16],
12221+
// _za32[_s32], _za32[_u32], _za32[_f32],
1221412222
// _za64[_s64], _za64[_u64] and _za64[_f64]
1221512223
void svwrite_hor_za8[_s8]_vg4(uint64_t tile, uint32_t slice, svint8x4_t zn)
1221612224
__arm_streaming __arm_inout("za");
1221712225

1221812226

12219-
// Variants are also available for _za8[_u8], _za8[_mf8], _za16[_s16], _za16[_u16],
12220-
// _za16[_f16], _za16[_bf16], _za32[_s32], _za32[_u32], _za32[_f32],
12227+
// Variants are also available for _za8[_u8], _za8[_mf8],
12228+
// _za16[_s16], _za16[_u16], _za16[_f16], _za16[_bf16],
12229+
// _za32[_s32], _za32[_u32], _za32[_f32],
1222112230
// _za64[_s64], _za64[_u64] and _za64[_f64]
1222212231
void svwrite_ver_za8[_s8]_vg2(uint64_t tile, uint32_t slice, svint8x2_t zn)
1222312232
__arm_streaming __arm_inout("za");
1222412233

1222512234

12226-
// Variants are also available for _za8[_u8], _za8[_mf8], _za16[_s16], _za16[_u16],
12227-
// _za16[_f16], _za16[_bf16], _za32[_s32], _za32[_u32], _za32[_f32],
12235+
// Variants are also available for _za8[_u8], _za8[_mf8],
12236+
// _za16[_s16], _za16[_u16], _za16[_f16], _za16[_bf16],
12237+
// _za32[_s32], _za32[_u32], _za32[_f32],
1222812238
// _za64[_s64], _za64[_u64] and _za64[_f64]
1222912239
void svwrite_ver_za8[_s8]_vg4(uint64_t tile, uint32_t slice, svint8x4_t zn)
1223012240
__arm_streaming __arm_inout("za");
1223112241

1223212242

12233-
// Variants are also available for _za8[_u8], _za8[_mf8], _za16[_s16], _za16[_u16],
12234-
// _za16[_f16], _za16[_bf16], _za32[_s32], _za32[_u32], _za32[_f32],
12243+
// Variants are also available for _za8[_u8], _za8[_mf8],
12244+
// _za16[_s16], _za16[_u16], _za16[_f16], _za16[_bf16],
12245+
// _za32[_s32], _za32[_u32], _za32[_f32],
1223512246
// _za64[_s64], _za64[_u64] and _za64[_f64]
1223612247
void svwrite_za8[_s8]_vg1x2(uint32_t slice, svint8x2_t zn)
1223712248
__arm_streaming __arm_inout("za");
1223812249

1223912250

12240-
// Variants are also available for _za8[_u8], za8[_mf8], _za16[_s16], _za16[_u16],
12241-
// _za16[_f16], _za16[_bf16], _za32[_s32], _za32[_u32], _za32[_f32],
12251+
// Variants are also available for _za8[_u8], za8[_mf8],
12252+
// _za16[_s16], _za16[_u16], _za16[_f16], _za16[_bf16],
12253+
// _za32[_s32], _za32[_u32], _za32[_f32],
1224212254
// _za64[_s64], _za64[_u64] and _za64[_f64]
1224312255
void svwrite_za8[_s8]_vg1x4(uint32_t slice, svint8x4_t zn)
1224412256
__arm_streaming __arm_inout("za");
@@ -12513,7 +12525,7 @@ The intrinsics in this section are defined by the header file
1251312525
Move and zero ZA tile slice to vector register.
1251412526

1251512527
```
12516-
// And similarly for u8.
12528+
// And similarly for u8 and mf8.
1251712529
svint8_t svreadz_hor_za8_s8(uint64_t tile, uint32_t slice)
1251812530
__arm_streaming __arm_inout("za");
1251912531

@@ -12529,11 +12541,12 @@ Move and zero ZA tile slice to vector register.
1252912541
svint64_t svreadz_hor_za64_s64(uint64_t tile, uint32_t slice)
1253012542
__arm_streaming __arm_inout("za");
1253112543

12532-
// And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
12544+
// And similarly for s16, s32, s64, u8, u16, u32, u64,
12545+
// mf8, bf16, f16, f32, f64
1253312546
svint8_t svreadz_hor_za128_s8(uint64_t tile, uint32_t slice)
1253412547
__arm_streaming __arm_inout("za");
1253512548

12536-
// And similarly for u8.
12549+
// And similarly for u8 and mf8.
1253712550
svint8_t svreadz_ver_za8_s8(uint64_t tile, uint32_t slice)
1253812551
__arm_streaming __arm_inout("za");
1253912552

@@ -12549,7 +12562,8 @@ Move and zero ZA tile slice to vector register.
1254912562
svint64_t svreadz_ver_za64_s64(uint64_t tile, uint32_t slice)
1255012563
__arm_streaming __arm_inout("za");
1255112564

12552-
// And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
12565+
// And similarly for s16, s32, s64, u8, u16, u32, u64,
12566+
// mf8, bf16, f16, f32, f64
1255312567
svint8_t svreadz_ver_za128_s8(uint64_t tile, uint32_t slice)
1255412568
__arm_streaming __arm_inout("za");
1255512569
```
@@ -12559,29 +12573,33 @@ Move and zero ZA tile slice to vector register.
1255912573
Move and zero multiple ZA tile slices to vector registers
1256012574

1256112575
``` c
12562-
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12563-
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12576+
// Variants are also available for _za8_u8, _za8_mf8,
12577+
// _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12578+
// _za32_s32, _za32_u32, _za32_f32,
1256412579
// _za64_s64, _za64_u64 and _za64_f64
1256512580
svint8x2_t svreadz_hor_za8_s8_vg2(uint64_t tile, uint32_t slice)
1256612581
__arm_streaming __arm_inout("za");
1256712582

1256812583

12569-
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12570-
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12584+
// Variants are also available for _za8_u8, _za8_mf8,
12585+
// _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12586+
// _za32_s32, _za32_u32, _za32_f32,
1257112587
// _za64_s64, _za64_u64 and _za64_f64
1257212588
svint8x4_t svreadz_hor_za8_s8_vg4(uint64_t tile, uint32_t slice)
1257312589
__arm_streaming __arm_inout("za");
1257412590

1257512591

12576-
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12577-
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12592+
// Variants are also available for _za8_u8, _za8_mf8,
12593+
// _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12594+
// _za32_s32, _za32_u32, _za32_f32,
1257812595
// _za64_s64, _za64_u64 and _za64_f64
1257912596
svint8x2_t svreadz_ver_za8_s8_vg2(uint64_t tile, uint32_t slice)
1258012597
__arm_streaming __arm_inout("za");
1258112598

1258212599

12583-
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12584-
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12600+
// Variants are also available for _za8_u8, _za8_mf8,
12601+
// _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12602+
// _za32_s32, _za32_u32, _za32_f32,
1258512603
// _za64_s64, _za64_u64 and _za64_f64
1258612604
svint8x4_t svreadz_ver_za8_s8_vg4(uint64_t tile, uint32_t slice)
1258712605
__arm_streaming __arm_inout("za");
@@ -12592,15 +12610,17 @@ Move and zero multiple ZA tile slices to vector registers
1259212610
Move and zero multiple ZA single-vector groups to vector registers
1259312611

1259412612
```
12595-
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12596-
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12613+
// Variants are also available for _za8_u8, _za8_mf8,
12614+
// _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12615+
// _za32_s32, _za32_u32, _za32_f32,
1259712616
// _za64_s64, _za64_u64 and _za64_f64
1259812617
svint8x2_t svreadz_za8_s8_vg1x2(uint32_t slice)
1259912618
__arm_streaming __arm_inout("za");
1260012619

1260112620

12602-
// Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12603-
// _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12621+
// Variants are also available for _za8_u8, _za8_mf8,
12622+
// _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12623+
// _za32_s32, _za32_u32, _za32_f32,
1260412624
// _za64_s64, _za64_u64 and _za64_f64
1260512625
svint8x4_t svreadz_za8_s8_vg1x4(uint32_t slice)
1260612626
__arm_streaming __arm_inout("za");

0 commit comments

Comments
 (0)