Skip to content

Commit

Permalink
-cpu/drcbex64.cpp: Fixed handling of address map global mask.
Browse files Browse the repository at this point in the history
* Fixed Final Furlong failing to boot.

-util/mfpresolve.cpp, emu/emumem_aspace.cpp: Moved some member function
 pointer manipulation stuff into the library.  Gives another slight
 reduction in the size of libemu.
  • Loading branch information
cuavas committed Feb 5, 2025
1 parent 4edf86f commit cea7758
Show file tree
Hide file tree
Showing 4 changed files with 267 additions and 68 deletions.
8 changes: 4 additions & 4 deletions src/devices/cpu/drcbex64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2530,7 +2530,7 @@ void drcbe_x64::op_read(Assembler &a, const instruction &inst)
// set up a call to the read handler
auto const &accessors = m_memory_accessors[spacesizep.space()];
bool const have_specific = (uintptr_t(nullptr) != accessors.specific.read.function) || accessors.specific.read.is_virtual;
auto const addr_mask = make_bitmask<uint32_t>(accessors.specific.address_width) & ~make_bitmask<uint32_t>(accessors.specific.native_mask_bits);
offs_t const addr_mask = m_space[spacesizep.space()]->addrmask() & make_bitmask<offs_t>(accessors.specific.address_width) & ~make_bitmask<offs_t>(accessors.specific.native_mask_bits);
mov_reg_param(a, Gpd(REG_PARAM2), addrp);
if (have_specific && ((1 << spacesizep.size()) == accessors.specific.native_bytes))
{
Expand Down Expand Up @@ -2698,7 +2698,7 @@ void drcbe_x64::op_readm(Assembler &a, const instruction &inst)
// set up a call to the read handler
auto const &accessors = m_memory_accessors[spacesizep.space()];
bool const have_specific = (uintptr_t(nullptr) != accessors.specific.read.function) || accessors.specific.read.is_virtual;
auto const addr_mask = make_bitmask<uint32_t>(accessors.specific.address_width) & ~make_bitmask<uint32_t>(accessors.specific.native_mask_bits);
offs_t const addr_mask = m_space[spacesizep.space()]->addrmask() & make_bitmask<offs_t>(accessors.specific.address_width) & ~make_bitmask<offs_t>(accessors.specific.native_mask_bits);
mov_reg_param(a, Gpd(REG_PARAM2), addrp);
if (spacesizep.size() != SIZE_QWORD)
mov_reg_param(a, Gpd(REG_PARAM3), maskp);
Expand Down Expand Up @@ -2856,7 +2856,7 @@ void drcbe_x64::op_write(Assembler &a, const instruction &inst)
// set up a call to the write handler
auto const &accessors = m_memory_accessors[spacesizep.space()];
bool const have_specific = (uintptr_t(nullptr) != accessors.specific.write.function) || accessors.specific.write.is_virtual;
auto const addr_mask = make_bitmask<uint32_t>(accessors.specific.address_width) & ~make_bitmask<uint32_t>(accessors.specific.native_mask_bits);
offs_t const addr_mask = m_space[spacesizep.space()]->addrmask() & make_bitmask<offs_t>(accessors.specific.address_width) & ~make_bitmask<offs_t>(accessors.specific.native_mask_bits);
mov_reg_param(a, Gpd(REG_PARAM2), addrp);
if (spacesizep.size() != SIZE_QWORD)
mov_reg_param(a, Gpd(REG_PARAM3), srcp);
Expand Down Expand Up @@ -2986,7 +2986,7 @@ void drcbe_x64::op_writem(Assembler &a, const instruction &inst)
// set up a call to the write handler
auto const &accessors = m_memory_accessors[spacesizep.space()];
bool const have_specific = (uintptr_t(nullptr) != accessors.specific.write.function) || accessors.specific.write.is_virtual;
auto const addr_mask = make_bitmask<uint32_t>(accessors.specific.address_width) & ~make_bitmask<uint32_t>(accessors.specific.native_mask_bits);
offs_t const addr_mask = m_space[spacesizep.space()]->addrmask() & make_bitmask<offs_t>(accessors.specific.address_width) & ~make_bitmask<offs_t>(accessors.specific.native_mask_bits);
mov_reg_param(a, Gpd(REG_PARAM2), addrp);
if (spacesizep.size() != SIZE_QWORD)
mov_reg_param(a, Gpd(REG_PARAM3), srcp);
Expand Down
54 changes: 10 additions & 44 deletions src/emu/emumem_aspace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@
***************************************************************************/

#include "emu.h"
#include <list>
#include <map>
#include "emuopts.h"
#include "debug/debugcpu.h"

#include "emumem_mud.h"
#include "emumem_hea.h"
Expand All @@ -27,6 +23,14 @@
#include "emumem_het.h"
#include "emumem_hws.h"

#include "emuopts.h"
#include "debug/debugcpu.h"

#include "mfpresolve.h"

#include <list>
#include <map>


//**************************************************************************
// DEBUGGING
Expand Down Expand Up @@ -321,49 +325,11 @@ class address_space_specific : public address_space
accessors.low_bits = emu::detail::handler_entry_dispatch_level_to_lowbits(Level, Width, AddrShift);
accessors.read.dispatch = reinterpret_cast<void const *const *>(m_dispatch_read);
accessors.write.dispatch = reinterpret_cast<void const *const *>(m_dispatch_write);
accessors.read.function = accessors.write.function = uintptr_t(nullptr);
accessors.read.displacement = accessors.write.displacement = 0;
accessors.read.is_virtual = accessors.write.is_virtual = false;

auto readfunc = &handler_entry_read<Width, AddrShift>::read;
auto writefunc = &handler_entry_write<Width, AddrShift>::write;
if (MAME_ABI_CXX_TYPE == MAME_ABI_CXX_ITANIUM) {
struct { std::uintptr_t ptr; std::ptrdiff_t adj; } equiv;
constexpr uintptr_t funcmask = ~uintptr_t((MAME_ABI_CXX_ITANIUM_MFP_TYPE == MAME_ABI_CXX_ITANIUM_MFP_ARM) ? 0 : 1);
constexpr unsigned deltashift = (MAME_ABI_CXX_ITANIUM_MFP_TYPE == MAME_ABI_CXX_ITANIUM_MFP_ARM) ? 1 : 0;

assert(sizeof(readfunc) == sizeof(equiv));
*reinterpret_cast<decltype(readfunc) *>(&equiv) = readfunc;
accessors.read.function = equiv.ptr & funcmask;
accessors.read.displacement = equiv.adj >> deltashift;
accessors.read.is_virtual = BIT((MAME_ABI_CXX_ITANIUM_MFP_TYPE == MAME_ABI_CXX_ITANIUM_MFP_ARM) ? equiv.adj : equiv.ptr, 0);

assert(sizeof(writefunc) == sizeof(equiv));
*reinterpret_cast<decltype(writefunc) *>(&equiv) = writefunc;
accessors.write.function = equiv.ptr & funcmask;
accessors.write.displacement = equiv.adj >> deltashift;
accessors.write.is_virtual = BIT((MAME_ABI_CXX_ITANIUM_MFP_TYPE == MAME_ABI_CXX_ITANIUM_MFP_ARM) ? equiv.adj : equiv.ptr, 0);
} else if (MAME_ABI_CXX_TYPE == MAME_ABI_CXX_MSVC) {
struct single { std::uintptr_t entrypoint; };
struct multi { std::uintptr_t entrypoint; int this_delta; };
struct { std::uintptr_t entrypoint; int this_delta; int vptr_offs; int vt_index; } const *unknown;

assert(sizeof(*unknown) >= sizeof(readfunc));
unknown = reinterpret_cast<decltype(unknown)>(&readfunc);
if ((sizeof(*unknown) > sizeof(readfunc)) || !unknown->vt_index) {
accessors.read.function = unknown->entrypoint;
accessors.read.displacement = (sizeof(single) < sizeof(readfunc)) ? unknown->this_delta : 0;
accessors.read.is_virtual = false;
}

assert(sizeof(*unknown) >= sizeof(writefunc));
unknown = reinterpret_cast<decltype(unknown)>(&writefunc);
if ((sizeof(*unknown) > sizeof(writefunc)) || !unknown->vt_index) {
accessors.write.function = unknown->entrypoint;
accessors.write.displacement = (sizeof(single) < sizeof(writefunc)) ? unknown->this_delta : 0;
accessors.write.is_virtual = false;
}
}
std::tie(accessors.read.function, accessors.read.displacement, accessors.read.is_virtual) = util::resolve_member_function(readfunc);
std::tie(accessors.write.function, accessors.write.displacement, accessors.write.is_virtual) = util::resolve_member_function(writefunc);

return accessors;
}
Expand Down
201 changes: 187 additions & 14 deletions src/lib/util/mfpresolve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,43 +62,74 @@ std::pair<std::uintptr_t, std::uintptr_t> resolve_member_function_itanium(
}


std::tuple<std::uintptr_t, std::ptrdiff_t, bool> resolve_member_function_itanium(
std::uintptr_t function,
std::ptrdiff_t delta) noexcept
{
constexpr uintptr_t funcmask = ~uintptr_t((MAME_ABI_CXX_ITANIUM_MFP_TYPE == MAME_ABI_CXX_ITANIUM_MFP_ARM) ? 0 : 1);
constexpr int deltashift = (MAME_ABI_CXX_ITANIUM_MFP_TYPE == MAME_ABI_CXX_ITANIUM_MFP_ARM) ? 1 : 0;
return std::make_tuple(
function & funcmask,
delta >> deltashift,
(MAME_ABI_CXX_ITANIUM_MFP_TYPE == MAME_ABI_CXX_ITANIUM_MFP_ARM) ? (delta & 1) : (function & 1));
}


std::pair<std::uintptr_t, std::uintptr_t> resolve_member_function_msvc(
void const *funcptr,
std::size_t size,
void const *object) noexcept
{
struct single { std::uintptr_t entrypoint; };
struct multi { std::uintptr_t entrypoint; int this_delta; };
struct { std::uintptr_t entrypoint; int this_delta; int vptr_offs; int vt_index; } const *unknown;
mfp_msvc_unknown_equiv const *unknown;
assert(sizeof(*unknown) >= size);
unknown = reinterpret_cast<decltype(unknown)>(funcptr);
unknown = reinterpret_cast<mfp_msvc_unknown_equiv const *>(funcptr);

LOG("Input this=%p ", object);
if (sizeof(single) < size)
LOG("thisdelta=%d ", unknown->this_delta);
if (sizeof(*unknown) == size)
LOG("vptrdelta=%d vindex=%d ", unknown->vptr_offs, unknown->vt_index);
if (sizeof(mfp_msvc_single_equiv) < size)
LOG("thisdelta=%d ", unknown->delta);
if (sizeof(mfp_msvc_unknown_equiv) == size)
LOG("vptrdelta=%d vindex=%d ", unknown->voffset, unknown->vindex);
auto byteptr = reinterpret_cast<std::uint8_t const *>(object);

// test for pointer to member function cast across virtual inheritance relationship
if ((sizeof(*unknown) == size) && unknown->vt_index)
if ((sizeof(mfp_msvc_unknown_equiv) == size) && unknown->vindex)
{
// add offset from "this" pointer to location of vptr, and add offset to virtual base from vtable
byteptr += unknown->vptr_offs;
std::uint8_t const *const vptr = *reinterpret_cast<std::uint8_t const *const *>(byteptr);
byteptr += *reinterpret_cast<int const *>(vptr + unknown->vt_index);
byteptr += unknown->voffset;
auto const vptr = *reinterpret_cast<std::uint8_t const *const *>(byteptr);
byteptr += *reinterpret_cast<int const *>(vptr + unknown->vindex);
}

// add "this" pointer displacement if present in the pointer to member function
if (sizeof(single) < size)
byteptr += unknown->this_delta;
if (sizeof(mfp_msvc_single_equiv) < size)
byteptr += unknown->delta;
LOG("Calculated this=%p\n", reinterpret_cast<void const *>(byteptr));

// walk past recognisable thunks
return std::make_pair(bypass_member_function_thunks(unknown->entrypoint, byteptr), std::uintptr_t(byteptr));
}


std::tuple<std::uintptr_t, std::ptrdiff_t, bool> resolve_member_function_msvc(
void const *funcptr,
std::size_t size) noexcept
{
mfp_msvc_unknown_equiv const *unknown;
assert(sizeof(*unknown) >= size);
unknown = reinterpret_cast<mfp_msvc_unknown_equiv const *>(funcptr);

// no way to represent pointer to member function cast across virtual inheritance relationship
if ((sizeof(mfp_msvc_unknown_equiv) == size) && unknown->vindex)
return std::make_tuple(std::uintptr_t(static_cast<void (*)()>(nullptr)), std::ptrdiff_t(0), false);

auto const [function, is_virtual] = bypass_member_function_thunks(unknown->entrypoint);
return std::make_tuple(
function,
(sizeof(mfp_msvc_single_equiv) < size) ? unknown->delta : 0,
is_virtual);
}


std::uintptr_t bypass_member_function_thunks(
std::uintptr_t entrypoint,
void const *object) noexcept
Expand Down Expand Up @@ -255,4 +286,146 @@ std::uintptr_t bypass_member_function_thunks(
#endif
}


std::pair<std::uintptr_t, bool> bypass_member_function_thunks(
std::uintptr_t entrypoint) noexcept
{
#if defined(__x86_64__) || defined(_M_X64)
std::uint8_t const *func = reinterpret_cast<std::uint8_t const *>(entrypoint);
while (true)
{
// Assumes Windows calling convention, and doesn't consider that
// the "this" pointer could be in RDX if RCX is a pointer to
// space for an oversize scalar result. Since the result area
// is uninitialised on entry, you won't see something that looks
// like a vtable dispatch through RCX in this case - it won't
// behave badly, it just won't bypass virtual call thunks in the
// rare situations where the return type is an oversize scalar.
if (0xe9 == func[0])
{
// relative jump with 32-bit displacement (typically a resolved PLT entry)
LOG("Found relative jump at %p ", func);
func += std::ptrdiff_t(5) + *reinterpret_cast<std::int32_t const *>(func + 1);
LOG("redirecting to %p\n", func);
continue;
}
else if ((0x48 == func[0]) && (0x8b == func[1]) && (0x01 == func[2]))
{
if ((0xff == func[3]) && ((0x20 == func[4]) || (0x60 == func[4]) || (0xa0 == func[4])))
{
// MSVC virtual function call thunk - mov rax,QWORD PTR [rcx] ; jmp QWORD PTR [rax+...]
LOG("Found virtual member function thunk at %p\n", func);
if (0x20 == func[4]) // no displacement
return std::make_pair(std::uintptr_t(0), true);
else if (0x60 == func[4]) // 8-bit displacement
return std::make_pair(std::uintptr_t(*reinterpret_cast<std::int8_t const *>(func + 5)), true);
else // 32-bit displacement
return std::make_pair(std::uintptr_t(*reinterpret_cast<std::int32_t const *>(func + 5)), true);
}
else if ((0x48 == func[3]) && (0x8b == func[4]))
{
// clang virtual function call thunk - mov rax,QWORD PTR [rcx] ; mov rax,QWORD PTR [rax+...] ; jmp rax
if ((0x00 == func[5]) && (0x48 == func[6]) && (0xff == func[7]) && (0xe0 == func[8]))
{
// no displacement
LOG("Found virtual member function thunk at %p\n", func);
return std::make_pair(std::uintptr_t(0), true);
}
else if ((0x40 == func[5]) && (0x48 == func[7]) && (0xff == func[8]) && (0xe0 == func[9]))
{
// 8-bit displacement
LOG("Found virtual member function thunk at %p\n", func);
return std::make_pair(std::uintptr_t(*reinterpret_cast<std::int8_t const *>(func + 6)), true);
}
else if ((0x80 == func[5]) && (0x48 == func[10]) && (0xff == func[11]) && (0xe0 == func[12]))
{
// 32-bit displacement
LOG("Found virtual member function thunk at %p\n", func);
return std::make_pair(std::uintptr_t(*reinterpret_cast<std::int32_t const *>(func + 6)), true);
}
}
}

// clang uses unoptimised thunks if optimisation is disabled
// Without optimisation, clang produces thunks like:
// 50 push rax
// 48 89 0c 24 mov QWORD PTR [rsp],rcx
// 48 8b 0c 24 mov rcx,QWORD PTR [rsp]
// 48 8b 01 mov rax,QWORD PTR [rcx]
// 48 8b 80 xx xx xx xx mov rax,QWORD PTR [rax+...]
// 41 5a pop r10
// 48 ff e0 jmp rax
// Trying to decode these thunks likely isn't worth the effort.
// Chasing performance in unoptimised builds isn't very useful,
// and the format of these thunks may be fragile.

// not something we can easily bypass
break;
}
return std::make_pair(std::uintptr_t(func), false);
#elif defined(__aarch64__) || defined(_M_ARM64)
std::uint32_t const *func = reinterpret_cast<std::uint32_t const *>(entrypoint);
auto const fetch = [&func] (auto offset) { return little_endianize_int32(func[offset]); };
while (true)
{
if ((0x90000010 == (fetch(0) & 0x9f00001f)) && (0x91000210 == (fetch(1) & 0xffc003ff)) && (0xd61f0200 == fetch(2)))
{
// page-relative jump with +/-4GB reach - adrp xip0,... ; add xip0,xip0,#... ; br xip0
LOG("Found page-relative jump at %p ", func);
std::int64_t const page =
(std::uint64_t(fetch(0) & 0x60000000) >> 17) |
(std::uint64_t(fetch(0) & 0x00ffffe0) << 9) |
((fetch(0) & 0x00800000) ? (~std::uint64_t(0) << 33) : 0);
std::uint32_t const offset = (fetch(1) & 0x003ffc00) >> 10;
func = reinterpret_cast<std::uint32_t const *>(((std::uintptr_t(func) + page) & (~std::uintptr_t(0) << 12)) + offset);
LOG("redirecting to %p\n", func);
}
else if ((0xf9400010 == fetch(0)) && (0xf9400210 == (fetch(1) & 0xffc003ff)) && (0xd61f0200 == fetch(2)))
{
// virtual function call thunk - ldr xip0,[x0] ; ldr xip0,[x0,#...] ; br xip0
LOG("Found virtual member function thunk at %p\n", func);
return std::make_pair(std::uintptr_t((fetch(1) & 0x003ffc00) >> (10 - 3)), true);
}
else
{
// not something we can easily bypass
break;
}

// clang uses horribly sub-optimal thunks for AArch64
// Without optimisation, clang produces thunks like:
// d10143ff sub sp,sp,#80
// f90027e7 str x7,[sp,#72]
// f90023e6 str x6,[sp,#64]
// f9001fe5 str x5,[sp,#56]
// f9001be4 str x4,[sp,#48]
// f90017e3 str x3,[sp,#40]
// f90013e2 str x2,[sp,#32]
// f9000fe1 str x1,[sp,#24]
// f90007e0 str x0,[sp,#8]
// f94007e0 ldr x0,[sp,#8]
// f9400009 ldr x9,[x0]
// f9400129 ldr x9,[x9,#...]
// 910143ff add sp,sp,#80
// d61f0120 br x9
// With optimisation, clang produces thunks like:
// d10103ff sub sp,sp,#64
// a9008be1 stp x1,x2,[sp,#8]
// a90193e3 stp x3,x4,[sp,#24]
// a9029be5 stp x5,x6,[sp,#40]
// f9001fe7 str x7,[sp,#56]
// f9400009 ldr x9,[x0]
// f9400129 ldr x9,[x9,#...]
// 910103ff add sp,sp,#64
// d61f0120 br x9
// It's more effort than it's worth to try decoding these
// thunks.

}
return std::make_pair(std::uintptr_t(func), false);
#else
return std::make_pair(entrypoint, false);
#endif
}

} // namespace util::detail
Loading

0 comments on commit cea7758

Please sign in to comment.