diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 3844d96a7fa077..6bfd9c34c578f0 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -8601,6 +8601,25 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V31, REG_V31, 64, INS_OPTS_SCALABLE_D); theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V31, 31, INS_OPTS_SCALABLE_D); theEmitter->emitIns_R_R_I(INS_sve_usra, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D); + + // IF_SVE_BX_2A + // DUPQ ., .[] + theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V21, REG_V10, 10, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V31, REG_V31, 15, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V21, REG_V10, 5, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V31, REG_V31, 7, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V21, REG_V10, 2, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V31, REG_V31, 3, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_D); + theEmitter->emitIns_R_R_I(INS_sve_dupq, EA_SCALABLE, REG_V31, REG_V31, 1, INS_OPTS_SCALABLE_D); + + // IF_SVE_BY_2A + // EXTQ .B, .B, .B, # + theEmitter->emitIns_R_R_I(INS_sve_extq, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_extq, EA_SCALABLE, REG_V31, REG_V31, 15, INS_OPTS_SCALABLE_B); } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 5a98ac3d07e279..2997de348e4370 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2626,6 +2626,48 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; + case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + assert(isScalableVectorSize(elemsize)); +#ifdef DEBUG + switch (id->idInsOpt()) + { + case INS_OPTS_SCALABLE_B: + assert(isValidUimm4(imm)); + break; + + case INS_OPTS_SCALABLE_H: + assert(isValidUimm3(imm)); + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimm2(imm)); + break; + + case INS_OPTS_SCALABLE_D: + assert(isValidImm1(imm)); + break; + + default: + break; + } +#endif // DEBUG + break; + + case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + assert(isScalableVectorSize(elemsize)); + assert(isValidUimm4(imm)); + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -9939,6 +9981,48 @@ void emitter::emitIns_R_R_I(instruction ins, fmt = IF_SVE_FU_2A; break; + case INS_sve_dupq: + assert(insOptsScalableStandard(opt)); + assert(insScalableOptsNone(sopt)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isScalableVectorSize(size)); +#ifdef DEBUG + switch (opt) + { + case INS_OPTS_SCALABLE_B: + assert(isValidUimm4(imm)); + break; + + case INS_OPTS_SCALABLE_H: + assert(isValidUimm3(imm)); + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimm2(imm)); + break; + + case INS_OPTS_SCALABLE_D: + assert(isValidImm1(imm)); + break; + + default: + break; + } +#endif // DEBUG + fmt = IF_SVE_BX_2A; + break; + + case INS_sve_extq: + assert(opt == INS_OPTS_SCALABLE_B); + assert(insScalableOptsNone(sopt)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isScalableVectorSize(size)); + assert(isValidUimm4(imm)); + fmt = IF_SVE_BY_2A; + break; + default: unreached(); break; @@ -19056,6 +19140,49 @@ void emitter::emitIns_Call(EmitCallType callType, return (encoding | (code_t)(imm << 16)); } +/***************************************************************************** + * + * Returns the encoding for the field 'i1:tsz' at bit locations '20:19-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsizeWithImmediate_i1_tsz(const insOpts opt, ssize_t imm) +{ + code_t encoding = 0; + + switch (opt) + { + case INS_OPTS_SCALABLE_B: + assert(isValidUimm4(imm)); + encoding |= (1 << 16); // bit 16 + encoding |= (imm << 17); // bits 20-17 + break; + + case INS_OPTS_SCALABLE_H: + assert(isValidUimm3(imm)); + encoding |= (1 << 17); // bit 17 + encoding |= (imm << 18); // bits 20-18 + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimm2(imm)); + encoding |= (1 << 18); // bit 18 + encoding |= (imm << 19); // bits 20-19 + break; + + case INS_OPTS_SCALABLE_D: + assert(isValidImm1(imm)); + encoding |= (1 << 19); // bit 19 + encoding |= (imm << 20); // bit 20 + break; + + default: + assert(!"Invalid size for vector register"); + break; + } + + return encoding; +} + /***************************************************************************** * * Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate. @@ -21140,6 +21267,17 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)imm << 14; } +/***************************************************************************** + * + * Returns the encoding for the immediate value as 4-bits at bit locations '19-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm4_19_to_16(ssize_t imm) +{ + assert(isValidUimm4(imm)); + return (code_t)imm << 16; +} + /***************************************************************************** * * Returns the encoding for the immediate value as 4-bits starting from 1, at bit locations '19-16'. @@ -24949,6 +25087,24 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeSveElemsizeWithImmediate_i1_tsz(id->idInsOpt(), imm); // ixxxx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // mmmmm + code |= insEncodeUimm4_19_to_16(imm); // iiii + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -28867,6 +29023,23 @@ void emitter::emitDispInsHelp( emitDispImm(imm, false); break; + // ., .[] + case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), id->idInsOpt(), false); + emitDispElementIndex(imm, false); + break; + + // .B, .B, .B, # + case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); + emitDispImm(imm, false); + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -32954,6 +33127,16 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_4C; break; + case IF_SVE_BX_2A: // ...........ixxxx ......nnnnnddddd -- sve_int_perm_dupq_i + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + case IF_SVE_BY_2A: // ............iiii ......mmmmmddddd -- sve_int_perm_extq + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index a5dafc0c12dfd1..b663149b194b8d 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -547,6 +547,9 @@ static code_t insEncodeSveElemsize_tszh_tszl_and_imm(const insOpts opt, const ss // Returns the encoding for the field 'tszh:tszl:imm3' at bit locations '23-22:20-19:18-16'. static code_t insEncodeSveElemsizeWithShift_tszh_tszl_imm3(const insOpts opt, ssize_t imm, bool isRightShift); +// Returns the encoding for the field 'i1:tsz' at bit locations '20:19-16'. +static code_t insEncodeSveElemsizeWithImmediate_i1_tsz(const insOpts opt, ssize_t imm); + // Returns the encoding to select the constant values 90 or 270 for an Arm64 SVE vector instruction // This specifically encode the field 'rot' at bit location '16'. static code_t insEncodeSveImm90_or_270_rot(ssize_t imm); @@ -669,6 +672,9 @@ static code_t insEncodeImm1_22(ssize_t imm); // Returns the encoding for the immediate value as 7-bits at bit locations '20-14'. static code_t insEncodeUimm7_20_to_14(ssize_t imm); +// Returns the encoding for the immediate value as 4-bits at bit locations '19-16'. +static code_t insEncodeUimm4_19_to_16(ssize_t imm); + // Returns the encoding for the immediate value as 4-bits starting from 1, at bit locations '19-16'. static code_t insEncodeUimm4From1_19_to_16(ssize_t imm);