Skip to content

Commit 61fff77

Browse files
rmacnak-googleCommit Queue
authored andcommitted
[vm, compiler] Add just enough of the vector extension to implement memcpy and memset.
TEST=ci, local qemu Change-Id: I9518049ca927fa42d3c04e9e045c6cec1342c789 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/462462 Reviewed-by: Alexander Aprelev <[email protected]> Commit-Queue: Ryan Macnak <[email protected]>
1 parent fd94c57 commit 61fff77

File tree

8 files changed

+1008
-19
lines changed

8 files changed

+1008
-19
lines changed

runtime/vm/compiler/assembler/assembler_riscv.cc

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2014,6 +2014,79 @@ void MicroAssembler::ssamoswapd(Register rd,
20142014
}
20152015
#endif // XLEN >= 64
20162016

2017+
void MicroAssembler::vsetvli(Register rd,
2018+
Register rs1,
2019+
ElementWidth sew,
2020+
LengthMultiplier lmul,
2021+
TailMode vta,
2022+
MaskMode vma) {
2023+
ASSERT(Supports(RV_V));
2024+
intx_t vtypei = (vma << 7) | (vta << 6) | (sew << 3) | (lmul << 0);
2025+
EmitIType(vtypei, rs1, OPCFG, rd, OPV);
2026+
}
2027+
2028+
void MicroAssembler::vle8v(VRegister vd, Address rs1, VectorMask vm) {
2029+
ASSERT(Supports(RV_V));
2030+
ASSERT(rs1.offset() == 0);
2031+
Emit32(EncodeOpcode(LOADFP) | EncodeVd(vd) | EncodeRs1(rs1.base()) |
2032+
EncodeFunct3(E8) | vm);
2033+
}
2034+
2035+
void MicroAssembler::vle16v(VRegister vd, Address rs1, VectorMask vm) {
2036+
ASSERT(Supports(RV_V));
2037+
ASSERT(rs1.offset() == 0);
2038+
Emit32(EncodeOpcode(LOADFP) | EncodeVd(vd) | EncodeRs1(rs1.base()) |
2039+
EncodeFunct3(E16) | vm);
2040+
}
2041+
2042+
void MicroAssembler::vle32v(VRegister vd, Address rs1, VectorMask vm) {
2043+
ASSERT(Supports(RV_V));
2044+
ASSERT(rs1.offset() == 0);
2045+
Emit32(EncodeOpcode(LOADFP) | EncodeVd(vd) | EncodeRs1(rs1.base()) |
2046+
EncodeFunct3(E32) | vm);
2047+
}
2048+
2049+
void MicroAssembler::vle64v(VRegister vd, Address rs1, VectorMask vm) {
2050+
ASSERT(Supports(RV_V));
2051+
ASSERT(rs1.offset() == 0);
2052+
Emit32(EncodeOpcode(LOADFP) | EncodeVd(vd) | EncodeRs1(rs1.base()) |
2053+
EncodeFunct3(E64) | vm);
2054+
}
2055+
2056+
void MicroAssembler::vse8v(VRegister vs3, Address rs1, VectorMask vm) {
2057+
ASSERT(Supports(RV_V));
2058+
ASSERT(rs1.offset() == 0);
2059+
Emit32(EncodeOpcode(STOREFP) | EncodeVs3(vs3) | EncodeRs1(rs1.base()) |
2060+
EncodeFunct3(E8) | vm);
2061+
}
2062+
2063+
void MicroAssembler::vse16v(VRegister vs3, Address rs1, VectorMask vm) {
2064+
ASSERT(Supports(RV_V));
2065+
ASSERT(rs1.offset() == 0);
2066+
Emit32(EncodeOpcode(STOREFP) | EncodeVs3(vs3) | EncodeRs1(rs1.base()) |
2067+
EncodeFunct3(E16) | vm);
2068+
}
2069+
2070+
void MicroAssembler::vse32v(VRegister vs3, Address rs1, VectorMask vm) {
2071+
ASSERT(Supports(RV_V));
2072+
ASSERT(rs1.offset() == 0);
2073+
Emit32(EncodeOpcode(STOREFP) | EncodeVs3(vs3) | EncodeRs1(rs1.base()) |
2074+
EncodeFunct3(E32) | vm);
2075+
}
2076+
2077+
void MicroAssembler::vse64v(VRegister vs3, Address rs1, VectorMask vm) {
2078+
ASSERT(Supports(RV_V));
2079+
ASSERT(rs1.offset() == 0);
2080+
Emit32(EncodeOpcode(STOREFP) | EncodeVs3(vs3) | EncodeRs1(rs1.base()) |
2081+
EncodeFunct3(E64) | vm);
2082+
}
2083+
2084+
void MicroAssembler::vmvvx(VRegister vd, Register rs1, VectorMask vm) {
2085+
ASSERT(Supports(RV_V));
2086+
Emit32(EncodeOpcode(OPV) | EncodeVd(vd) | EncodeRs1(rs1) |
2087+
EncodeFunct3(OPIVX) | EncodeFunct6(VMV) | vm);
2088+
}
2089+
20172090
void MicroAssembler::lb(Register rd, Address addr, std::memory_order order) {
20182091
ASSERT(addr.offset() == 0);
20192092
ASSERT((order == std::memory_order_acquire) ||

runtime/vm/compiler/assembler/assembler_riscv.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,23 @@ class MicroAssembler : public AssemblerBase {
723723
std::memory_order order = std::memory_order_relaxed);
724724
#endif // XLEN >= 64
725725

726+
// ==== RV32V: Vectors ====
727+
void vsetvli(Register rd,
728+
Register rs1,
729+
ElementWidth sew,
730+
LengthMultiplier lmul,
731+
TailMode vta,
732+
MaskMode vma);
733+
void vle8v(VRegister vd, Address rs1, VectorMask vm = unmasked);
734+
void vle16v(VRegister vd, Address rs1, VectorMask vm = unmasked);
735+
void vle32v(VRegister vd, Address rs1, VectorMask vm = unmasked);
736+
void vle64v(VRegister vd, Address rs1, VectorMask vm = unmasked);
737+
void vse8v(VRegister vs3, Address rs1, VectorMask vm = unmasked);
738+
void vse16v(VRegister vs3, Address rs1, VectorMask vm = unmasked);
739+
void vse32v(VRegister vs3, Address rs1, VectorMask vm = unmasked);
740+
void vse64v(VRegister vs3, Address rs1, VectorMask vm = unmasked);
741+
void vmvvx(VRegister vd, Register rs1, VectorMask vm = unmasked);
742+
726743
// ==== Zalasr: Load-acquire, store-release ====
727744
void lb(Register rd, Address addr, std::memory_order order);
728745
void lh(Register rd, Address addr, std::memory_order order);

runtime/vm/compiler/assembler/assembler_riscv_test.cc

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8479,6 +8479,261 @@ ASSEMBLER_TEST_RUN(DoubleLessOrEqualQuiet, test) {
84798479
EXPECT_EQ(0, CallI(test->entry(), qNAN, -3.0));
84808480
}
84818481

8482+
ASSEMBLER_TEST_GENERATE(VectorMemoryCopy, assembler) {
8483+
__ SetExtensions(RV_GC | RV_V);
8484+
Label loop;
8485+
__ Bind(&loop);
8486+
__ vsetvli(A4, A2, e8, m8, ta, ma);
8487+
__ vle8v(V0, Address(A1));
8488+
__ add(A1, A1, A4);
8489+
__ sub(A2, A2, A4);
8490+
__ vse8v(V0, Address(A0));
8491+
__ add(A0, A0, A4);
8492+
__ bnez(A2, &loop);
8493+
__ ret();
8494+
}
8495+
ASSEMBLER_TEST_RUN(VectorMemoryCopy, test) {
8496+
EXPECT_DISASSEMBLY(
8497+
"0c367757 vsetvli tmp2, a2, e8, m8, ta, ma\n"
8498+
"02058007 vle8.v v0, (a1)\n"
8499+
" 95ba add a1, a1, tmp2\n"
8500+
" 8e19 sub a2, a2, tmp2\n"
8501+
"02050027 vse8.v v0, (a0)\n"
8502+
" 953a add a0, a0, tmp2\n"
8503+
" f67d bnez a2, -18\n"
8504+
" 8082 ret\n");
8505+
8506+
intptr_t len = 1000;
8507+
uint8_t* src = reinterpret_cast<uint8_t*>(malloc(len));
8508+
uint8_t* dst = reinterpret_cast<uint8_t*>(malloc(len));
8509+
for (intptr_t i = 0; i < len; i++) {
8510+
src[i] = i & 0xFF;
8511+
dst[i] = 0xFF;
8512+
}
8513+
8514+
Call(test->entry(), reinterpret_cast<intx_t>(dst),
8515+
reinterpret_cast<intx_t>(src), len);
8516+
for (intptr_t i = 0; i < len; i++) {
8517+
EXPECT_EQ(i & 0xFF, src[i]);
8518+
EXPECT_EQ(i & 0xFF, dst[i]);
8519+
}
8520+
8521+
// AVL < VLEN
8522+
dst[0] = 0xFF;
8523+
Call(test->entry(), reinterpret_cast<intx_t>(dst),
8524+
reinterpret_cast<intx_t>(src), 1);
8525+
EXPECT_EQ(0, dst[0]);
8526+
8527+
// AVL = 0
8528+
dst[0] = 0xFF;
8529+
Call(test->entry(), reinterpret_cast<intx_t>(dst),
8530+
reinterpret_cast<intx_t>(src), 0);
8531+
EXPECT_EQ(0xFF, dst[0]);
8532+
8533+
free(src);
8534+
free(dst);
8535+
}
8536+
8537+
ASSEMBLER_TEST_GENERATE(VectorMemorySet8, assembler) {
8538+
__ SetExtensions(RV_GC | RV_V);
8539+
Label loop;
8540+
__ Bind(&loop);
8541+
__ vsetvli(A4, A2, e8, m8, ta, ma);
8542+
__ vmvvx(V0, A1);
8543+
__ sub(A2, A2, A4);
8544+
__ vse8v(V0, Address(A0));
8545+
__ add(A0, A0, A4);
8546+
__ bnez(A2, &loop);
8547+
__ ret();
8548+
}
8549+
ASSEMBLER_TEST_RUN(VectorMemorySet8, test) {
8550+
EXPECT_DISASSEMBLY(
8551+
"0c367757 vsetvli tmp2, a2, e8, m8, ta, ma\n"
8552+
"5e05c057 vmv.v.x v0, a1\n"
8553+
" 8e19 sub a2, a2, tmp2\n"
8554+
"02050027 vse8.v v0, (a0)\n"
8555+
" 953a add a0, a0, tmp2\n"
8556+
" fa65 bnez a2, -16\n"
8557+
" 8082 ret\n");
8558+
8559+
intptr_t len = 100;
8560+
uint8_t* dst = reinterpret_cast<uint8_t*>(malloc(len * 1));
8561+
for (intptr_t i = 0; i < len; i++) {
8562+
dst[i] = 0;
8563+
}
8564+
8565+
Call(test->entry(), reinterpret_cast<intx_t>(dst), 0x12, len);
8566+
for (intptr_t i = 0; i < len; i++) {
8567+
EXPECT_EQ(0x12u, dst[i]);
8568+
}
8569+
8570+
// AVL < VLEN
8571+
dst[0] = 0;
8572+
dst[1] = 0;
8573+
Call(test->entry(), reinterpret_cast<intx_t>(dst), 0x23, 1);
8574+
EXPECT_EQ(0x23u, dst[0]);
8575+
EXPECT_EQ(0u, dst[1]);
8576+
8577+
// AVL = 0
8578+
dst[0] = 0;
8579+
Call(test->entry(), reinterpret_cast<intx_t>(dst), 0xFF, 0);
8580+
EXPECT_EQ(0u, dst[0]);
8581+
8582+
free(dst);
8583+
}
8584+
8585+
ASSEMBLER_TEST_GENERATE(VectorMemorySet16, assembler) {
8586+
__ SetExtensions(RV_GC | RV_V);
8587+
Label loop;
8588+
__ Bind(&loop);
8589+
__ vsetvli(A4, A2, e16, m8, ta, ma);
8590+
__ vmvvx(V0, A1);
8591+
__ sub(A2, A2, A4);
8592+
__ vse16v(V0, Address(A0));
8593+
__ slli(A4, A4, 1);
8594+
__ add(A0, A0, A4);
8595+
__ bnez(A2, &loop);
8596+
__ ret();
8597+
}
8598+
ASSEMBLER_TEST_RUN(VectorMemorySet16, test) {
8599+
EXPECT_DISASSEMBLY(
8600+
"0cb67757 vsetvli tmp2, a2, e16, m8, ta, ma\n"
8601+
"5e05c057 vmv.v.x v0, a1\n"
8602+
" 8e19 sub a2, a2, tmp2\n"
8603+
"02055027 vse16.v v0, (a0)\n"
8604+
" 0706 slli tmp2, tmp2, 0x1\n"
8605+
" 953a add a0, a0, tmp2\n"
8606+
" f67d bnez a2, -18\n"
8607+
" 8082 ret\n");
8608+
8609+
intptr_t len = 100;
8610+
uint16_t* dst = reinterpret_cast<uint16_t*>(malloc(len * 2));
8611+
for (intptr_t i = 0; i < len; i++) {
8612+
dst[i] = 0;
8613+
}
8614+
8615+
Call(test->entry(), reinterpret_cast<intx_t>(dst), 0x1234, len);
8616+
for (intptr_t i = 0; i < len; i++) {
8617+
EXPECT_EQ(0x1234u, dst[i]);
8618+
}
8619+
8620+
// AVL < VLEN
8621+
dst[0] = 0;
8622+
dst[1] = 0;
8623+
Call(test->entry(), reinterpret_cast<intx_t>(dst), 0x2345, 1);
8624+
EXPECT_EQ(0x2345u, dst[0]);
8625+
EXPECT_EQ(0u, dst[1]);
8626+
8627+
// AVL = 0
8628+
dst[0] = 0;
8629+
Call(test->entry(), reinterpret_cast<intx_t>(dst), 0xFFFF, 0);
8630+
EXPECT_EQ(0u, dst[0]);
8631+
8632+
free(dst);
8633+
}
8634+
8635+
ASSEMBLER_TEST_GENERATE(VectorMemorySet32, assembler) {
8636+
__ SetExtensions(RV_GC | RV_V);
8637+
Label loop;
8638+
__ Bind(&loop);
8639+
__ vsetvli(A4, A2, e32, m8, ta, ma);
8640+
__ vmvvx(V0, A1);
8641+
__ sub(A2, A2, A4);
8642+
__ vse32v(V0, Address(A0));
8643+
__ slli(A4, A4, 2);
8644+
__ add(A0, A0, A4);
8645+
__ bnez(A2, &loop);
8646+
__ ret();
8647+
}
8648+
ASSEMBLER_TEST_RUN(VectorMemorySet32, test) {
8649+
EXPECT_DISASSEMBLY(
8650+
"0d367757 vsetvli tmp2, a2, e32, m8, ta, ma\n"
8651+
"5e05c057 vmv.v.x v0, a1\n"
8652+
" 8e19 sub a2, a2, tmp2\n"
8653+
"02056027 vse32.v v0, (a0)\n"
8654+
" 070a slli tmp2, tmp2, 0x2\n"
8655+
" 953a add a0, a0, tmp2\n"
8656+
" f67d bnez a2, -18\n"
8657+
" 8082 ret\n");
8658+
8659+
intptr_t len = 100;
8660+
uint32_t* dst = reinterpret_cast<uint32_t*>(malloc(len * 4));
8661+
for (intptr_t i = 0; i < len; i++) {
8662+
dst[i] = 0;
8663+
}
8664+
8665+
Call(test->entry(), reinterpret_cast<intx_t>(dst), 0x12345678, len);
8666+
for (intptr_t i = 0; i < len; i++) {
8667+
EXPECT_EQ(0x12345678u, dst[i]);
8668+
}
8669+
8670+
// AVL < VLEN
8671+
dst[0] = 0;
8672+
dst[1] = 0;
8673+
Call(test->entry(), reinterpret_cast<intx_t>(dst), 0x23456789, 1);
8674+
EXPECT_EQ(0x23456789u, dst[0]);
8675+
EXPECT_EQ(0u, dst[1]);
8676+
8677+
// AVL = 0
8678+
dst[0] = 0;
8679+
Call(test->entry(), reinterpret_cast<intx_t>(dst), 0xFFFFFFFF, 0);
8680+
EXPECT_EQ(0u, dst[0]);
8681+
8682+
free(dst);
8683+
}
8684+
8685+
#if XLEN >= 64
8686+
ASSEMBLER_TEST_GENERATE(VectorMemorySet64, assembler) {
8687+
__ SetExtensions(RV_GC | RV_V);
8688+
Label loop;
8689+
__ Bind(&loop);
8690+
__ vsetvli(A4, A2, e64, m8, ta, ma);
8691+
__ vmvvx(V0, A1);
8692+
__ sub(A2, A2, A4);
8693+
__ vse64v(V0, Address(A0));
8694+
__ slli(A4, A4, 3);
8695+
__ add(A0, A0, A4);
8696+
__ bnez(A2, &loop);
8697+
__ ret();
8698+
}
8699+
ASSEMBLER_TEST_RUN(VectorMemorySet64, test) {
8700+
EXPECT_DISASSEMBLY(
8701+
"0db67757 vsetvli tmp2, a2, e64, m8, ta, ma\n"
8702+
"5e05c057 vmv.v.x v0, a1\n"
8703+
" 8e19 sub a2, a2, tmp2\n"
8704+
"02057027 vse64.v v0, (a0)\n"
8705+
" 070e slli tmp2, tmp2, 0x3\n"
8706+
" 953a add a0, a0, tmp2\n"
8707+
" f67d bnez a2, -18\n"
8708+
" 8082 ret\n");
8709+
8710+
intptr_t len = 100;
8711+
uint64_t* dst = reinterpret_cast<uint64_t*>(malloc(len * 8));
8712+
for (intptr_t i = 0; i < len; i++) {
8713+
dst[i] = 0;
8714+
}
8715+
8716+
Call(test->entry(), reinterpret_cast<intx_t>(dst), 0x1234567812345678, len);
8717+
for (intptr_t i = 0; i < len; i++) {
8718+
EXPECT_EQ(0x1234567812345678u, dst[i]);
8719+
}
8720+
8721+
// AVL < VLEN
8722+
dst[0] = 0;
8723+
dst[1] = 0;
8724+
Call(test->entry(), reinterpret_cast<intx_t>(dst), 0x2345678923456789, 1);
8725+
EXPECT_EQ(0x2345678923456789u, dst[0]);
8726+
EXPECT_EQ(0u, dst[1]);
8727+
8728+
// AVL = 0
8729+
dst[0] = 0;
8730+
Call(test->entry(), reinterpret_cast<intx_t>(dst), 0xFFFFFFFF, 0);
8731+
EXPECT_EQ(0u, dst[0]);
8732+
8733+
free(dst);
8734+
}
8735+
#endif
8736+
84828737
ASSEMBLER_TEST_GENERATE(LoadByteAcquire, assembler) {
84838738
__ SetExtensions(RV_GC | RV_Zalasr);
84848739
__ lb(A0, Address(A1), std::memory_order_acquire);

0 commit comments

Comments
 (0)