Skip to content

Commit a66dcfc

Browse files
JIT ARM64-SVE: Add Sve.LoadVector*ZeroExtendTo*() (#101291)
* JIT ARM64-SVE: Add Sve.LoadVector*ZeroExtendTo*() Add the following APIs: LoadVectorByteZeroExtendToInt16 LoadVectorByteZeroExtendToInt32 LoadVectorByteZeroExtendToInt64 LoadVectorByteZeroExtendToUInt16 LoadVectorByteZeroExtendToUInt32 LoadVectorByteZeroExtendToUInt64 LoadVectorInt16SignExtendToInt32 LoadVectorInt16SignExtendToInt64 LoadVectorInt16SignExtendToUInt32 LoadVectorInt16SignExtendToUInt64 LoadVectorInt32SignExtendToInt64 LoadVectorInt32SignExtendToUInt64 LoadVectorSByteSignExtendToInt16 LoadVectorSByteSignExtendToInt32 LoadVectorSByteSignExtendToInt64 LoadVectorSByteSignExtendToUInt16 LoadVectorSByteSignExtendToUInt32 LoadVectorSByteSignExtendToUInt64 LoadVectorUInt16ZeroExtendToInt32 LoadVectorUInt16ZeroExtendToInt64 LoadVectorUInt16ZeroExtendToUInt32 LoadVectorUInt16ZeroExtendToUInt64 LoadVectorUInt32ZeroExtendToInt64 LoadVectorUInt32ZeroExtendToUInt64 * cleanup: remove unwatnted comments Remove comments that mentions instuctions that APIs are never mapped to. * fix merge conflict * fix merge conflict * fix spacing * Mark LoadVector*Extend* as having HW_Flag_ExplicitMaskedOperation --------- Co-authored-by: Kunal Pathak <[email protected]>
1 parent 5b4e770 commit a66dcfc

File tree

7 files changed

+535
-1
lines changed

7 files changed

+535
-1
lines changed

Diff for: src/coreclr/jit/emitarm64sve.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -4349,8 +4349,11 @@ void emitter::emitInsSve_R_R_R(instruction ins,
43494349
break;
43504350

43514351
case INS_sve_ld1b:
4352+
case INS_sve_ld1sb:
43524353
case INS_sve_ld1h:
4354+
case INS_sve_ld1sh:
43534355
case INS_sve_ld1w:
4356+
case INS_sve_ld1sw:
43544357
case INS_sve_ld1d:
43554358
return emitIns_R_R_R_I(ins, size, reg1, reg2, reg3, 0, opt);
43564359

Diff for: src/coreclr/jit/gentree.cpp

+24
Original file line numberDiff line numberDiff line change
@@ -26508,6 +26508,30 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
2650826508
break;
2650926509

2651026510
case NI_Sve_LoadVector:
26511+
case NI_Sve_LoadVectorByteZeroExtendToInt16:
26512+
case NI_Sve_LoadVectorByteZeroExtendToInt32:
26513+
case NI_Sve_LoadVectorByteZeroExtendToInt64:
26514+
case NI_Sve_LoadVectorByteZeroExtendToUInt16:
26515+
case NI_Sve_LoadVectorByteZeroExtendToUInt32:
26516+
case NI_Sve_LoadVectorByteZeroExtendToUInt64:
26517+
case NI_Sve_LoadVectorInt16SignExtendToInt32:
26518+
case NI_Sve_LoadVectorInt16SignExtendToInt64:
26519+
case NI_Sve_LoadVectorInt16SignExtendToUInt32:
26520+
case NI_Sve_LoadVectorInt16SignExtendToUInt64:
26521+
case NI_Sve_LoadVectorInt32SignExtendToInt64:
26522+
case NI_Sve_LoadVectorInt32SignExtendToUInt64:
26523+
case NI_Sve_LoadVectorSByteSignExtendToInt16:
26524+
case NI_Sve_LoadVectorSByteSignExtendToInt32:
26525+
case NI_Sve_LoadVectorSByteSignExtendToInt64:
26526+
case NI_Sve_LoadVectorSByteSignExtendToUInt16:
26527+
case NI_Sve_LoadVectorSByteSignExtendToUInt32:
26528+
case NI_Sve_LoadVectorSByteSignExtendToUInt64:
26529+
case NI_Sve_LoadVectorUInt16ZeroExtendToInt32:
26530+
case NI_Sve_LoadVectorUInt16ZeroExtendToInt64:
26531+
case NI_Sve_LoadVectorUInt16ZeroExtendToUInt32:
26532+
case NI_Sve_LoadVectorUInt16ZeroExtendToUInt64:
26533+
case NI_Sve_LoadVectorUInt32ZeroExtendToInt64:
26534+
case NI_Sve_LoadVectorUInt32ZeroExtendToUInt64:
2651126535
addr = Op(2);
2651226536
break;
2651326537
#endif // TARGET_ARM64

Diff for: src/coreclr/jit/hwintrinsiclistarm64sve.h

+24
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,30 @@ HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32,
3232
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
3333

3434
HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
35+
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
36+
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
37+
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
38+
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
39+
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
40+
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
41+
HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
42+
HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
43+
HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
44+
HARDWARE_INTRINSIC(Sve, LoadVectorInt16SignExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
45+
HARDWARE_INTRINSIC(Sve, LoadVectorInt32SignExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
46+
HARDWARE_INTRINSIC(Sve, LoadVectorInt32SignExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
47+
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
48+
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
49+
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
50+
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToUInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
51+
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
52+
HARDWARE_INTRINSIC(Sve, LoadVectorSByteSignExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1sb, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
53+
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
54+
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
55+
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
56+
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
57+
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
58+
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
3559

3660

3761

0 commit comments

Comments
 (0)