@@ -11,7 +11,7 @@ toc: true
11
11
---
12
12
13
13
<!--
14
- SPDX-FileCopyrightText: Copyright 2011-
2024 Arm Limited and/or its affiliates <
[email protected] >
14
+ SPDX-FileCopyrightText: Copyright 2011-
2025 Arm Limited and/or its affiliates <
[email protected] >
15
15
SPDX-FileCopyrightText: Copyright 2022 Google LLC.
16
16
CC-BY-SA-4.0 AND Apache-Patent-License
17
17
See LICENSE.md file for details
@@ -438,9 +438,9 @@ Armv8.4-A [[ARMARMv84]](#ARMARMv84). Support is added for the Dot Product intrin
438
438
* Refined function versioning scope and signature rules to use the default
439
439
version scope and signature.
440
440
* Added `_n` forms of the SVE2p1 and SME2 `svdot` intrinsics.
441
- * Changed the status of the SME2p1 ACLE from Alpha to Beta.
442
- * Changed the status of the SVE2p1 ACLE from Alpha to Beta.
443
-
441
+ * Changed the status of the SME2p1 from Alpha to Beta.
442
+ * Changed the status of the SVE2p1 from Alpha to Beta.
443
+ * Added mf8 variants of SME 2.1 intrinsics.
444
444
445
445
### References
446
446
@@ -12160,85 +12160,97 @@ Lookup table read with 2-bit and 4-bit indexes
12160
12160
Move multi-vectors to/from ZA
12161
12161
12162
12162
``` c
12163
- // Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
12164
- // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12163
+ // Variants are also available for _za8_u8, _za8_mf8,
12164
+ // _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12165
+ // _za32_s32, _za32_u32, _za32_f32,
12165
12166
// _za64_s64, _za64_u64 and _za64_f64
12166
12167
svint8x2_t svread_hor_za8_s8_vg2(uint64_t tile, uint32_t slice)
12167
12168
__arm_streaming __arm_in("za");
12168
12169
12169
12170
12170
- // Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
12171
- // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12171
+ // Variants are also available for _za8_u8, _za8_mf8,
12172
+ // _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12173
+ // _za32_s32, _za32_u32, _za32_f32,
12172
12174
// _za64_s64, _za64_u64 and _za64_f64
12173
12175
svint8x4_t svread_hor_za8_s8_vg4(uint64_t tile, uint32_t slice)
12174
12176
__arm_streaming __arm_in("za");
12175
12177
12176
12178
12177
- // Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
12178
- // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12179
+ // Variants are also available for _za8_u8, _za8_mf8,
12180
+ // _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12181
+ // _za32_s32, _za32_u32, _za32_f32,
12179
12182
// _za64_s64, _za64_u64 and _za64_f64
12180
12183
svint8x2_t svread_ver_za8_s8_vg2(uint64_t tile, uint32_t slice)
12181
12184
__arm_streaming __arm_in("za");
12182
12185
12183
12186
12184
- // Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
12185
- // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12187
+ // Variants are also available for _za8_u8, _za8_mf8,
12188
+ // _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12189
+ // _za32_s32, _za32_u32, _za32_f32,
12186
12190
// _za64_s64, _za64_u64 and _za64_f64
12187
12191
svint8x4_t svread_ver_za8_s8_vg4(uint64_t tile, uint32_t slice)
12188
12192
__arm_streaming __arm_in("za");
12189
12193
12190
12194
12191
- // Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
12192
- // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12195
+ // Variants are also available for _za8_u8, _za8_mf8,
12196
+ // _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12197
+ // _za32_s32, _za32_u32, _za32_f32,
12193
12198
// _za64_s64, _za64_u64 and _za64_f64
12194
12199
svint8x2_t svread_za8_s8_vg1x2(uint32_t slice)
12195
12200
__arm_streaming __arm_in("za");
12196
12201
12197
12202
12198
- // Variants are also available for _za8_u8, _za8_mf8, _za16_s16, _za16_u16,
12199
- // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12203
+ // Variants are also available for _za8_u8, _za8_mf8,
12204
+ // _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12205
+ // _za32_s32, _za32_u32, _za32_f32,
12200
12206
// _za64_s64, _za64_u64 and _za64_f64
12201
12207
svint8x4_t svread_za8_s8_vg1x4(uint32_t slice)
12202
12208
__arm_streaming __arm_in("za");
12203
12209
12204
12210
12205
- // Variants are also available for _za8[_u8], _za8[_mf8], _za16[_s16], _za16[_u16],
12206
- // _za16[_f16], _za16[_bf16], _za32[_s32], _za32[_u32], _za32[_f32],
12211
+ // Variants are also available for _za8[_u8], _za8[_mf8],
12212
+ // _za16[_s16], _za16[_u16], _za16[_f16], _za16[_bf16],
12213
+ // _za32[_s32], _za32[_u32], _za32[_f32],
12207
12214
// _za64[_s64], _za64[_u64] and _za64[_f64]
12208
12215
void svwrite_hor_za8[_s8]_vg2(uint64_t tile, uint32_t slice, svint8x2_t zn)
12209
12216
__arm_streaming __arm_inout("za");
12210
12217
12211
12218
12212
- // Variants are also available for _za8[_u8], _za8[_mf8], _za16[_s16], _za16[_u16],
12213
- // _za16[_f16], _za16[_bf16], _za32[_s32], _za32[_u32], _za32[_f32],
12219
+ // Variants are also available for _za8[_u8], _za8[_mf8],
12220
+ // _za16[_s16], _za16[_u16], _za16[_f16], _za16[_bf16],
12221
+ // _za32[_s32], _za32[_u32], _za32[_f32],
12214
12222
// _za64[_s64], _za64[_u64] and _za64[_f64]
12215
12223
void svwrite_hor_za8[_s8]_vg4(uint64_t tile, uint32_t slice, svint8x4_t zn)
12216
12224
__arm_streaming __arm_inout("za");
12217
12225
12218
12226
12219
- // Variants are also available for _za8[_u8], _za8[_mf8], _za16[_s16], _za16[_u16],
12220
- // _za16[_f16], _za16[_bf16], _za32[_s32], _za32[_u32], _za32[_f32],
12227
+ // Variants are also available for _za8[_u8], _za8[_mf8],
12228
+ // _za16[_s16], _za16[_u16], _za16[_f16], _za16[_bf16],
12229
+ // _za32[_s32], _za32[_u32], _za32[_f32],
12221
12230
// _za64[_s64], _za64[_u64] and _za64[_f64]
12222
12231
void svwrite_ver_za8[_s8]_vg2(uint64_t tile, uint32_t slice, svint8x2_t zn)
12223
12232
__arm_streaming __arm_inout("za");
12224
12233
12225
12234
12226
- // Variants are also available for _za8[_u8], _za8[_mf8], _za16[_s16], _za16[_u16],
12227
- // _za16[_f16], _za16[_bf16], _za32[_s32], _za32[_u32], _za32[_f32],
12235
+ // Variants are also available for _za8[_u8], _za8[_mf8],
12236
+ // _za16[_s16], _za16[_u16], _za16[_f16], _za16[_bf16],
12237
+ // _za32[_s32], _za32[_u32], _za32[_f32],
12228
12238
// _za64[_s64], _za64[_u64] and _za64[_f64]
12229
12239
void svwrite_ver_za8[_s8]_vg4(uint64_t tile, uint32_t slice, svint8x4_t zn)
12230
12240
__arm_streaming __arm_inout("za");
12231
12241
12232
12242
12233
- // Variants are also available for _za8[_u8], _za8[_mf8], _za16[_s16], _za16[_u16],
12234
- // _za16[_f16], _za16[_bf16], _za32[_s32], _za32[_u32], _za32[_f32],
12243
+ // Variants are also available for _za8[_u8], _za8[_mf8],
12244
+ // _za16[_s16], _za16[_u16], _za16[_f16], _za16[_bf16],
12245
+ // _za32[_s32], _za32[_u32], _za32[_f32],
12235
12246
// _za64[_s64], _za64[_u64] and _za64[_f64]
12236
12247
void svwrite_za8[_s8]_vg1x2(uint32_t slice, svint8x2_t zn)
12237
12248
__arm_streaming __arm_inout("za");
12238
12249
12239
12250
12240
- // Variants are also available for _za8[_u8], za8[_mf8], _za16[_s16], _za16[_u16],
12241
- // _za16[_f16], _za16[_bf16], _za32[_s32], _za32[_u32], _za32[_f32],
12251
+ // Variants are also available for _za8[_u8], za8[_mf8],
12252
+ // _za16[_s16], _za16[_u16], _za16[_f16], _za16[_bf16],
12253
+ // _za32[_s32], _za32[_u32], _za32[_f32],
12242
12254
// _za64[_s64], _za64[_u64] and _za64[_f64]
12243
12255
void svwrite_za8[_s8]_vg1x4(uint32_t slice, svint8x4_t zn)
12244
12256
__arm_streaming __arm_inout("za");
@@ -12513,7 +12525,7 @@ The intrinsics in this section are defined by the header file
12513
12525
Move and zero ZA tile slice to vector register.
12514
12526
12515
12527
```
12516
- // And similarly for u8.
12528
+ // And similarly for u8 and mf8 .
12517
12529
svint8_t svreadz_hor_za8_s8(uint64_t tile, uint32_t slice)
12518
12530
__arm_streaming __arm_inout("za");
12519
12531
@@ -12529,11 +12541,12 @@ Move and zero ZA tile slice to vector register.
12529
12541
svint64_t svreadz_hor_za64_s64(uint64_t tile, uint32_t slice)
12530
12542
__arm_streaming __arm_inout("za");
12531
12543
12532
- // And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
12544
+ // And similarly for s16, s32, s64, u8, u16, u32, u64,
12545
+ // mf8, bf16, f16, f32, f64
12533
12546
svint8_t svreadz_hor_za128_s8(uint64_t tile, uint32_t slice)
12534
12547
__arm_streaming __arm_inout("za");
12535
12548
12536
- // And similarly for u8.
12549
+ // And similarly for u8 and mf8 .
12537
12550
svint8_t svreadz_ver_za8_s8(uint64_t tile, uint32_t slice)
12538
12551
__arm_streaming __arm_inout("za");
12539
12552
@@ -12549,7 +12562,8 @@ Move and zero ZA tile slice to vector register.
12549
12562
svint64_t svreadz_ver_za64_s64(uint64_t tile, uint32_t slice)
12550
12563
__arm_streaming __arm_inout("za");
12551
12564
12552
- // And similarly for s16, s32, s64, u8, u16, u32, u64, bf16, f16, f32, f64
12565
+ // And similarly for s16, s32, s64, u8, u16, u32, u64,
12566
+ // mf8, bf16, f16, f32, f64
12553
12567
svint8_t svreadz_ver_za128_s8(uint64_t tile, uint32_t slice)
12554
12568
__arm_streaming __arm_inout("za");
12555
12569
```
@@ -12559,29 +12573,33 @@ Move and zero ZA tile slice to vector register.
12559
12573
Move and zero multiple ZA tile slices to vector registers
12560
12574
12561
12575
``` c
12562
- // Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12563
- // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12576
+ // Variants are also available for _za8_u8, _za8_mf8,
12577
+ // _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12578
+ // _za32_s32, _za32_u32, _za32_f32,
12564
12579
// _za64_s64, _za64_u64 and _za64_f64
12565
12580
svint8x2_t svreadz_hor_za8_s8_vg2(uint64_t tile, uint32_t slice)
12566
12581
__arm_streaming __arm_inout("za");
12567
12582
12568
12583
12569
- // Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12570
- // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12584
+ // Variants are also available for _za8_u8, _za8_mf8,
12585
+ // _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12586
+ // _za32_s32, _za32_u32, _za32_f32,
12571
12587
// _za64_s64, _za64_u64 and _za64_f64
12572
12588
svint8x4_t svreadz_hor_za8_s8_vg4(uint64_t tile, uint32_t slice)
12573
12589
__arm_streaming __arm_inout("za");
12574
12590
12575
12591
12576
- // Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12577
- // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12592
+ // Variants are also available for _za8_u8, _za8_mf8,
12593
+ // _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12594
+ // _za32_s32, _za32_u32, _za32_f32,
12578
12595
// _za64_s64, _za64_u64 and _za64_f64
12579
12596
svint8x2_t svreadz_ver_za8_s8_vg2(uint64_t tile, uint32_t slice)
12580
12597
__arm_streaming __arm_inout("za");
12581
12598
12582
12599
12583
- // Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12584
- // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12600
+ // Variants are also available for _za8_u8, _za8_mf8,
12601
+ // _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12602
+ // _za32_s32, _za32_u32, _za32_f32,
12585
12603
// _za64_s64, _za64_u64 and _za64_f64
12586
12604
svint8x4_t svreadz_ver_za8_s8_vg4(uint64_t tile, uint32_t slice)
12587
12605
__arm_streaming __arm_inout("za");
@@ -12592,15 +12610,17 @@ Move and zero multiple ZA tile slices to vector registers
12592
12610
Move and zero multiple ZA single-vector groups to vector registers
12593
12611
12594
12612
```
12595
- // Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12596
- // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12613
+ // Variants are also available for _za8_u8, _za8_mf8,
12614
+ // _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12615
+ // _za32_s32, _za32_u32, _za32_f32,
12597
12616
// _za64_s64, _za64_u64 and _za64_f64
12598
12617
svint8x2_t svreadz_za8_s8_vg1x2(uint32_t slice)
12599
12618
__arm_streaming __arm_inout("za");
12600
12619
12601
12620
12602
- // Variants are also available for _za8_u8, _za16_s16, _za16_u16,
12603
- // _za16_f16, _za16_bf16, _za32_s32, _za32_u32, _za32_f32,
12621
+ // Variants are also available for _za8_u8, _za8_mf8,
12622
+ // _za16_s16, _za16_u16, _za16_f16, _za16_bf16,
12623
+ // _za32_s32, _za32_u32, _za32_f32,
12604
12624
// _za64_s64, _za64_u64 and _za64_f64
12605
12625
svint8x4_t svreadz_za8_s8_vg1x4(uint32_t slice)
12606
12626
__arm_streaming __arm_inout("za");
0 commit comments