Skip to content

Commit 6f1d8c5

Browse files
ebephokunalspathak
andauthored
Arm64/SVE: Implemented ConvertToint64 and ConvertToUInt64 (#104069)
* Added ConverToInt32 and ConvertToUInt32 for float inputs. * Added flags to handle only low predicate registers. * Fix whitespace * Remove special codegen flag * Added new test template for operations with different return types. * Add new test template. * Added api for ConvertToInt32 and ConvertToUInt 32 for double. * Completed SVE Apis for ConvertToInt64 and ConvertToUInt64. * ConvertToSingle for int and uint. * ConvertToSingle for long and ulong. * Started ConvertToDouble. * Changed Validation Template Test name. * ConvertToInt64. * ConvertToInt64 passes optimized tests. * Added cases for ConvertToSingle and ConvertToDouble. * double or long to 32 bit value. * Removed ConvertToDouble and ConvertToSingle. * Removed more of ConvertToSingle and ConvertToDouble. * all tests pass. * addressed comments. * jit format: * Remove trailing space --------- Co-authored-by: Kunal Pathak <[email protected]>
1 parent 3a294ed commit 6f1d8c5

File tree

10 files changed

+127
-16
lines changed

10 files changed

+127
-16
lines changed

src/coreclr/jit/hwintrinsic.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -1731,6 +1731,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
17311731
{
17321732
case NI_Sve_ConvertToInt32:
17331733
case NI_Sve_ConvertToUInt32:
1734+
case NI_Sve_ConvertToInt64:
1735+
case NI_Sve_ConvertToUInt64:
17341736
// Save the base type of return SIMD. It is used to contain this intrinsic inside
17351737
// ConditionalSelect.
17361738
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(getBaseJitTypeOfSIMDType(sig->retTypeSigClass));

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

+14-3
Original file line numberDiff line numberDiff line change
@@ -511,12 +511,22 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
511511

512512
// Special handling for ConvertTo* APIs
513513
// Just need to change the opt here.
514+
insOpts embOpt = opt;
514515
switch (intrinEmbMask.id)
515516
{
516517
case NI_Sve_ConvertToInt32:
517518
case NI_Sve_ConvertToUInt32:
518519
{
519-
opt = intrinEmbMask.baseType == TYP_DOUBLE ? INS_OPTS_D_TO_S : INS_OPTS_SCALABLE_S;
520+
embOpt = emitTypeSize(intrinEmbMask.baseType) == EA_8BYTE ? INS_OPTS_D_TO_S
521+
: INS_OPTS_SCALABLE_S;
522+
break;
523+
}
524+
525+
case NI_Sve_ConvertToInt64:
526+
case NI_Sve_ConvertToUInt64:
527+
{
528+
embOpt = emitTypeSize(intrinEmbMask.baseType) == EA_4BYTE ? INS_OPTS_S_TO_D
529+
: INS_OPTS_SCALABLE_D;
520530
break;
521531
}
522532
default:
@@ -555,7 +565,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
555565

556566
// We cannot use use `movprfx` here to move falseReg to targetReg because that will
557567
// overwrite the value of embMaskOp1Reg which is present in targetReg.
558-
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, opt);
568+
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg,
569+
embOpt);
559570

560571
GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
561572
falseReg, opt);
@@ -569,7 +580,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
569580
}
570581
}
571582

572-
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, opt);
583+
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, embOpt);
573584
break;
574585
}
575586

src/coreclr/jit/hwintrinsiclistarm64sve.h

+2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ HARDWARE_INTRINSIC(Sve, Compute64BitAddresses,
3232
HARDWARE_INTRINSIC(Sve, Compute8BitAddresses, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
3333
HARDWARE_INTRINSIC(Sve, ConditionalSelect, -1, 3, true, {INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel, INS_sve_sel}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_SupportsContainment)
3434
HARDWARE_INTRINSIC(Sve, ConvertToInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
35+
HARDWARE_INTRINSIC(Sve, ConvertToInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzs, INS_sve_fcvtzs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
3536
HARDWARE_INTRINSIC(Sve, ConvertToUInt32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzu, INS_sve_fcvtzu}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
37+
HARDWARE_INTRINSIC(Sve, ConvertToUInt64, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcvtzu, INS_sve_fcvtzu}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
3638
HARDWARE_INTRINSIC(Sve, Count16BitElements, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cnth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed)
3739
HARDWARE_INTRINSIC(Sve, Count32BitElements, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cntw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed)
3840
HARDWARE_INTRINSIC(Sve, Count64BitElements, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_cntd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasEnumOperand|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed)

src/coreclr/jit/lowerarmarch.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -3390,7 +3390,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
33903390
// For now, make sure that we get here only for intrinsics that we are
33913391
// sure about to rely on auxiliary type's size.
33923392
assert((embOp->GetHWIntrinsicId() == NI_Sve_ConvertToInt32) ||
3393-
(embOp->GetHWIntrinsicId() == NI_Sve_ConvertToUInt32));
3393+
(embOp->GetHWIntrinsicId() == NI_Sve_ConvertToUInt32) ||
3394+
(embOp->GetHWIntrinsicId() == NI_Sve_ConvertToInt64) ||
3395+
(embOp->GetHWIntrinsicId() == NI_Sve_ConvertToUInt64));
33943396

33953397
uint32_t auxSize = genTypeSize(embOp->GetAuxiliaryType());
33963398
if (maskSize == auxSize)

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs

+41
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,26 @@ internal Arm64() { }
869869
public static unsafe Vector<int> ConvertToInt32(Vector<float> value) { throw new PlatformNotSupportedException(); }
870870

871871

872+
/// ConvertToInt64 : Floating-point convert
873+
874+
/// <summary>
875+
/// svint64_t svcvt_s64[_f64]_m(svint64_t inactive, svbool_t pg, svfloat64_t op)
876+
/// FCVTZS Ztied.D, Pg/M, Zop.D
877+
/// svint64_t svcvt_s64[_f64]_x(svbool_t pg, svfloat64_t op)
878+
/// FCVTZS Ztied.D, Pg/M, Ztied.D
879+
/// svint64_t svcvt_s64[_f64]_z(svbool_t pg, svfloat64_t op)
880+
/// </summary>
881+
public static unsafe Vector<long> ConvertToInt64(Vector<double> value) { throw new PlatformNotSupportedException(); }
882+
883+
/// <summary>
884+
/// svint64_t svcvt_s64[_f32]_m(svint64_t inactive, svbool_t pg, svfloat32_t op)
885+
/// FCVTZS Ztied.D, Pg/M, Zop.S
886+
/// svint64_t svcvt_s64[_f32]_x(svbool_t pg, svfloat32_t op)
887+
/// FCVTZS Ztied.D, Pg/M, Ztied.S
888+
/// svint64_t svcvt_s64[_f32]_z(svbool_t pg, svfloat32_t op)
889+
/// </summary>
890+
public static unsafe Vector<long> ConvertToInt64(Vector<float> value) { throw new PlatformNotSupportedException(); }
891+
872892
/// ConvertToUInt32 : Floating-point convert
873893

874894
/// <summary>
@@ -890,6 +910,27 @@ internal Arm64() { }
890910
public static unsafe Vector<uint> ConvertToUInt32(Vector<float> value) { throw new PlatformNotSupportedException(); }
891911

892912

913+
/// ConvertToUInt64 : Floating-point convert
914+
915+
/// <summary>
916+
/// svuint64_t svcvt_u64[_f64]_m(svuint64_t inactive, svbool_t pg, svfloat64_t op)
917+
/// FCVTZU Ztied.D, Pg/M, Zop.D
918+
/// svuint64_t svcvt_u64[_f64]_x(svbool_t pg, svfloat64_t op)
919+
/// FCVTZU Ztied.D, Pg/M, Ztied.D
920+
/// svuint64_t svcvt_u64[_f64]_z(svbool_t pg, svfloat64_t op)
921+
/// </summary>
922+
public static unsafe Vector<ulong> ConvertToUInt64(Vector<double> value) { throw new PlatformNotSupportedException(); }
923+
924+
/// <summary>
925+
/// svuint64_t svcvt_u64[_f32]_m(svuint64_t inactive, svbool_t pg, svfloat32_t op)
926+
/// FCVTZU Ztied.D, Pg/M, Zop.S
927+
/// svuint64_t svcvt_u64[_f32]_x(svbool_t pg, svfloat32_t op)
928+
/// FCVTZU Ztied.D, Pg/M, Ztied.S
929+
/// svuint64_t svcvt_u64[_f32]_z(svbool_t pg, svfloat32_t op)
930+
/// </summary>
931+
public static unsafe Vector<ulong> ConvertToUInt64(Vector<float> value) { throw new PlatformNotSupportedException(); }
932+
933+
893934
/// Count16BitElements : Count the number of 16-bit elements in a vector
894935

895936
/// <summary>

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs

+42
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,27 @@ internal Arm64() { }
926926
public static unsafe Vector<int> ConvertToInt32(Vector<float> value) => ConvertToInt32(value);
927927

928928

929+
/// ConvertToInt64 : Floating-point convert
930+
931+
/// <summary>
932+
/// svint64_t svcvt_s64[_f64]_m(svint64_t inactive, svbool_t pg, svfloat64_t op)
933+
/// FCVTZS Ztied.D, Pg/M, Zop.D
934+
/// svint64_t svcvt_s64[_f64]_x(svbool_t pg, svfloat64_t op)
935+
/// FCVTZS Ztied.D, Pg/M, Ztied.D
936+
/// svint64_t svcvt_s64[_f64]_z(svbool_t pg, svfloat64_t op)
937+
/// </summary>
938+
public static unsafe Vector<long> ConvertToInt64(Vector<double> value) => ConvertToInt64(value);
939+
940+
/// <summary>
941+
/// svint64_t svcvt_s64[_f32]_m(svint64_t inactive, svbool_t pg, svfloat32_t op)
942+
/// FCVTZS Ztied.D, Pg/M, Zop.S
943+
/// svint64_t svcvt_s64[_f32]_x(svbool_t pg, svfloat32_t op)
944+
/// FCVTZS Ztied.D, Pg/M, Ztied.S
945+
/// svint64_t svcvt_s64[_f32]_z(svbool_t pg, svfloat32_t op)
946+
/// </summary>
947+
public static unsafe Vector<long> ConvertToInt64(Vector<float> value) => ConvertToInt64(value);
948+
949+
929950
/// ConvertToUInt32 : Floating-point convert
930951

931952
/// <summary>
@@ -947,6 +968,27 @@ internal Arm64() { }
947968
public static unsafe Vector<uint> ConvertToUInt32(Vector<float> value) => ConvertToUInt32(value);
948969

949970

971+
/// ConvertToUInt64 : Floating-point convert
972+
973+
/// <summary>
974+
/// svuint64_t svcvt_u64[_f64]_m(svuint64_t inactive, svbool_t pg, svfloat64_t op)
975+
/// FCVTZU Ztied.D, Pg/M, Zop.D
976+
/// svuint64_t svcvt_u64[_f64]_x(svbool_t pg, svfloat64_t op)
977+
/// FCVTZU Ztied.D, Pg/M, Ztied.D
978+
/// svuint64_t svcvt_u64[_f64]_z(svbool_t pg, svfloat64_t op)
979+
/// </summary>
980+
public static unsafe Vector<ulong> ConvertToUInt64(Vector<double> value) => ConvertToUInt64(value);
981+
982+
/// <summary>
983+
/// svuint64_t svcvt_u64[_f32]_m(svuint64_t inactive, svbool_t pg, svfloat32_t op)
984+
/// FCVTZU Ztied.D, Pg/M, Zop.S
985+
/// svuint64_t svcvt_u64[_f32]_x(svbool_t pg, svfloat32_t op)
986+
/// FCVTZU Ztied.D, Pg/M, Ztied.S
987+
/// svuint64_t svcvt_u64[_f32]_z(svbool_t pg, svfloat32_t op)
988+
/// </summary>
989+
public static unsafe Vector<ulong> ConvertToUInt64(Vector<float> value) => ConvertToUInt64(value);
990+
991+
950992
/// Count16BitElements : Count the number of 16-bit elements in a vector
951993

952994
/// <summary>

src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs

+4
Original file line numberDiff line numberDiff line change
@@ -4317,8 +4317,12 @@ internal Arm64() { }
43174317

43184318
public static System.Numerics.Vector<int> ConvertToInt32(System.Numerics.Vector<double> value) { throw null; }
43194319
public static System.Numerics.Vector<int> ConvertToInt32(System.Numerics.Vector<float> value) { throw null; }
4320+
public static System.Numerics.Vector<long> ConvertToInt64(System.Numerics.Vector<double> value) { throw null; }
4321+
public static System.Numerics.Vector<long> ConvertToInt64(System.Numerics.Vector<float> value) { throw null; }
43204322
public static System.Numerics.Vector<uint> ConvertToUInt32(System.Numerics.Vector<double> value) { throw null; }
43214323
public static System.Numerics.Vector<uint> ConvertToUInt32(System.Numerics.Vector<float> value) { throw null; }
4324+
public static System.Numerics.Vector<ulong> ConvertToUInt64(System.Numerics.Vector<double> value) { throw null; }
4325+
public static System.Numerics.Vector<ulong> ConvertToUInt64(System.Numerics.Vector<float> value) { throw null; }
43224326

43234327
public static ulong Count16BitElements([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; }
43244328
public static ulong Count32BitElements([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; }

0 commit comments

Comments
 (0)