diff --git a/src/devices/cpu/drcbearm64.cpp b/src/devices/cpu/drcbearm64.cpp index 2f5c9eec02bbd..a70e00b37079f 100644 --- a/src/devices/cpu/drcbearm64.cpp +++ b/src/devices/cpu/drcbearm64.cpp @@ -196,21 +196,21 @@ class ThrowableErrorHandler : public ErrorHandler // helper functions -a64::Vec select_register(a64::Vec const ®, uint32_t regsize) +inline a64::Vec select_register(a64::Vec const ®, uint32_t regsize) { if (regsize == 4) return reg.s(); return reg.d(); } -a64::Gp select_register(a64::Gp const ®, uint32_t regsize) +inline a64::Gp select_register(a64::Gp const ®, uint32_t regsize) { if (regsize == 4) return reg.w(); return reg.x(); } -bool is_valid_immediate_mask(uint64_t val, size_t bytes) +inline bool is_valid_immediate_mask(uint64_t val, size_t bytes) { // all zeros and all ones aren't allowed, and disallow any value with bits outside of the max bit range if (val == 0 || val == make_bitmask(bytes * 8)) @@ -231,18 +231,28 @@ bool is_valid_immediate_mask(uint64_t val, size_t bytes) return population_count_64(val) == head - tail; } -bool is_valid_immediate(uint64_t val, size_t bits) +inline bool is_valid_immediate(uint64_t val, size_t bits) { assert(bits < 64); return val < (uint64_t(1) << bits); } -bool is_valid_immediate_signed(int64_t val, size_t bits) +inline constexpr bool is_valid_immediate_signed(int64_t val, size_t bits) { return util::sext(val, bits) == val; } -bool emit_add_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val) +inline constexpr bool is_valid_offset(int64_t diff, int max_shift) +{ + if (is_valid_immediate_signed(diff, 9)) + return true; // 9-bit signed offset + else if ((diff >= 0) && (diff < (1 << (12 + max_shift))) && !(diff & make_bitmask(max_shift))) + return true; // 12-bit unsigned offset shifted by operand size + else + return false; +} + +inline bool emit_add_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val) { // If the bottom 12 bits are 0s then an optimized form can be used if the remaining bits are <= 12 if (is_valid_immediate(val, 12) || ((val & 0xfff) == 0 && is_valid_immediate(val >> 12, 12))) @@ -254,7 +264,7 @@ bool emit_add_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &sr return false; } -bool emit_sub_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val) +inline bool emit_sub_optimized(a64::Assembler &a, const a64::Gp &dst, const a64::Gp &src, int64_t val) { if (val < 0) val = -val; @@ -509,11 +519,11 @@ void drcbe_arm64::get_imm_relative(a64::Assembler &a, const a64::Gp ®, const a.mov(reg, val); } -void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode, const a64::Reg ®, const void *ptr) const +inline void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode, const a64::Reg ®, int max_shift, const void *ptr) const { - // If it can fit as a constant offset + // If it can fit as an immediate offset const int64_t diff = (int64_t)ptr - (int64_t)m_baseptr; - if (is_valid_immediate_signed(diff, 9)) + if (is_valid_offset(diff, max_shift)) { a.emit(opcode, reg, arm::Mem(BASE_REG, diff)); return; @@ -547,31 +557,30 @@ void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode, return; } - if (diff >= 0) + // If it's in a nearby page + const uint64_t pagebase = codeoffs & ~make_bitmask(12); + const int64_t pagerel = (int64_t)ptr - pagebase; + if (is_valid_immediate_signed(pagerel, 21 + 12)) { - int shift = 0; - int max_shift = 0; + const uint64_t targetpage = (uint64_t)ptr & ~make_bitmask(12); + const uint64_t pageoffs = (uint64_t)ptr & util::make_bitmask(12); - if (opcode == a64::Inst::kIdLdrb || opcode == a64::Inst::kIdLdrsb) - max_shift = 0; - else if (opcode == a64::Inst::kIdLdrh || opcode == a64::Inst::kIdLdrsh) - max_shift = 1; - else if (opcode == a64::Inst::kIdLdrsw) - max_shift = 2; + a.adrp(MEM_SCRATCH_REG, targetpage); + if (is_valid_offset(pageoffs, max_shift)) + { + a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG, pageoffs)); + } else - max_shift = (reg.isGpW() || reg.isVecS()) ? 2 : 3; - - for (int i = 0; i < 64 && max_shift > 0; i++) { - if ((uint64_t)ptr & ((uint64_t)(1) << i)) - { - shift = i; - break; - } + a.add(MEM_SCRATCH_REG, MEM_SCRATCH_REG, pageoffs); + a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG)); } + return; + } - if (shift > max_shift) - shift = max_shift; + if (diff >= 0) + { + const int shift = (diff & make_bitmask(max_shift)) ? 0 : max_shift; if (is_valid_immediate(diff >> shift, 32)) { @@ -586,44 +595,23 @@ void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode, } } - const uint64_t pagebase = codeoffs & ~make_bitmask(12); - const int64_t pagerel = (int64_t)ptr - pagebase; - if (is_valid_immediate_signed(pagerel, 21 + 12)) - { - const uint64_t targetpage = (uint64_t)ptr & ~make_bitmask(12); - const uint64_t pageoffs = (uint64_t)ptr & util::make_bitmask(12); - - a.adrp(MEM_SCRATCH_REG, targetpage); - - if (is_valid_immediate_signed(pageoffs, 9)) - { - a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG, pageoffs)); - return; - } - else if (emit_add_optimized(a, MEM_SCRATCH_REG, MEM_SCRATCH_REG, pageoffs)) - { - a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG)); - return; - } - } - // Can't optimize it at all, most likely becomes 4 MOV commands a.mov(MEM_SCRATCH_REG, ptr); a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG)); } -void drcbe_arm64::emit_ldr_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdr, reg, ptr); } -void drcbe_arm64::emit_ldrb_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrb, reg, ptr); } -void drcbe_arm64::emit_ldrh_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrh, reg, ptr); } -void drcbe_arm64::emit_ldrsb_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsb, reg, ptr); } -void drcbe_arm64::emit_ldrsh_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsh, reg, ptr); } -void drcbe_arm64::emit_ldrsw_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsw, reg, ptr); } -void drcbe_arm64::emit_str_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStr, reg, ptr); } -void drcbe_arm64::emit_strb_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStrb, reg, ptr); } -void drcbe_arm64::emit_strh_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStrh, reg, ptr); } +void drcbe_arm64::emit_ldr_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdr, reg, reg.isGpW() ? 2 : 3, ptr); } +void drcbe_arm64::emit_ldrb_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrb, reg, 0, ptr); } +void drcbe_arm64::emit_ldrh_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrh, reg, 1, ptr); } +void drcbe_arm64::emit_ldrsb_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsb, reg, 0, ptr); } +void drcbe_arm64::emit_ldrsh_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsh, reg, 1, ptr); } +void drcbe_arm64::emit_ldrsw_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdrsw, reg, 2, ptr); } +void drcbe_arm64::emit_str_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStr, reg, reg.isGpW() ? 2 : 3, ptr); } +void drcbe_arm64::emit_strb_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStrb, reg, 0, ptr); } +void drcbe_arm64::emit_strh_mem(a64::Assembler &a, const a64::Gp ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStrh, reg, 1, ptr); } -void drcbe_arm64::emit_float_ldr_mem(a64::Assembler &a, const a64::Vec ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdr_v, reg, ptr); } -void drcbe_arm64::emit_float_str_mem(a64::Assembler &a, const a64::Vec ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStr_v, reg, ptr); } +void drcbe_arm64::emit_float_ldr_mem(a64::Assembler &a, const a64::Vec ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdLdr_v, reg, reg.isVecS() ? 2 : 3, ptr); } +void drcbe_arm64::emit_float_str_mem(a64::Assembler &a, const a64::Vec ®, const void *ptr) const { emit_ldr_str_base_mem(a, a64::Inst::kIdStr_v, reg, reg.isVecS() ? 2 : 3, ptr); } void drcbe_arm64::mov_reg_param(a64::Assembler &a, uint32_t regsize, const a64::Gp &dst, const be_parameter &src) const { @@ -803,7 +791,7 @@ void drcbe_arm64::call_arm_addr(a64::Assembler &a, const void *offs) const { const uint64_t codeoffs = a.code()->baseAddress() + a.offset(); const int64_t reloffs = (int64_t)offs - codeoffs; - if (is_valid_immediate_signed(reloffs, 26)) + if (is_valid_immediate_signed(reloffs, 26 + 2)) { a.bl(offs); } diff --git a/src/devices/cpu/drcbearm64.h b/src/devices/cpu/drcbearm64.h index 258d2947d3e68..6748226b96246 100644 --- a/src/devices/cpu/drcbearm64.h +++ b/src/devices/cpu/drcbearm64.h @@ -169,7 +169,7 @@ class drcbe_arm64 : public drcbe_interface // helper functions void get_imm_relative(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const uint64_t ptr) const; - void emit_ldr_str_base_mem(asmjit::a64::Assembler &a, asmjit::a64::Inst::Id opcode, const asmjit::a64::Reg ®, const void *ptr) const; + void emit_ldr_str_base_mem(asmjit::a64::Assembler &a, asmjit::a64::Inst::Id opcode, const asmjit::a64::Reg ®, int max_shift, const void *ptr) const; void emit_ldr_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const void *ptr) const; void emit_ldrb_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const void *ptr) const; void emit_ldrh_mem(asmjit::a64::Assembler &a, const asmjit::a64::Gp ®, const void *ptr) const;