Skip to content

Commit

Permalink
Merge pull request #4285 from neobrain/fix_ptso_offsets
Browse files Browse the repository at this point in the history
Fix crashes in Paranoid TSO mode
  • Loading branch information
Sonicadvance1 authored Jan 21, 2025
2 parents 840f306 + 32c75f0 commit 56c95e3
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 11 deletions.
4 changes: 4 additions & 0 deletions FEXCore/Source/Interface/Context/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,10 @@ class ContextImpl final : public FEXCore::Context::Context {
AtomicTSOEmulationEnabled = false;
VectorAtomicTSOEmulationEnabled = false;
MemcpyAtomicTSOEmulationEnabled = false;
} else if (Config.ParanoidTSO) {
AtomicTSOEmulationEnabled = true;
VectorAtomicTSOEmulationEnabled = true;
MemcpyAtomicTSOEmulationEnabled = true;
} else {
// Atomic TSO emulation only enabled if the config option is enabled.
AtomicTSOEmulationEnabled = (IsMemoryShared || !Config.TSOAutoMigration) && Config.TSOEnabled;
Expand Down
4 changes: 4 additions & 0 deletions FEXCore/Source/Interface/Core/JIT/JITClass.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter {
ARMEmitter::ExtendedMemOperand GenerateMemOperand(IR::OpSize AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
IR::MemOffsetType OffsetType, uint8_t OffsetScale);

[[nodiscard]]
ARMEmitter::Register ApplyMemOperand(IR::OpSize AccessSize, ARMEmitter::Register Base, ARMEmitter::Register Tmp,
IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, uint8_t OffsetScale);

// NOTE: Will use TMP1 as a way to encode immediates that happen to fall outside
// the limits of the scalar plus immediate variant of SVE load/stores.
//
Expand Down
69 changes: 58 additions & 11 deletions FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -591,6 +591,44 @@ ARMEmitter::ExtendedMemOperand Arm64JITCore::GenerateMemOperand(
FEX_UNREACHABLE;
}

ARMEmitter::Register Arm64JITCore::ApplyMemOperand(IR::OpSize AccessSize, ARMEmitter::Register Base, ARMEmitter::Register Tmp,
IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, uint8_t OffsetScale) {
if (Offset.IsInvalid()) {
return Base;
}

if (OffsetScale != 1 && OffsetScale != IR::OpSizeToSize(AccessSize)) {
LOGMAN_MSG_A_FMT("Unhandled OffsetScale: {}", OffsetScale);
}

uint64_t Const;
if (IsInlineConstant(Offset, &Const)) {
if (Const == 0) {
return Base;
}
LoadConstant(ARMEmitter::Size::i64Bit, Tmp, Const);
add(ARMEmitter::Size::i64Bit, Tmp, Base, Tmp, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(OffsetScale));
} else {
auto RegOffset = GetReg(Offset.ID());
switch (OffsetType.Val) {
case IR::MEM_OFFSET_SXTX.Val:
add(ARMEmitter::Size::i64Bit, Tmp, Base, RegOffset, ARMEmitter::ExtendedType::SXTX, FEXCore::ilog2(OffsetScale));
break;

case IR::MEM_OFFSET_UXTW.Val:
add(ARMEmitter::Size::i64Bit, Tmp, Base, RegOffset, ARMEmitter::ExtendedType::UXTW, FEXCore::ilog2(OffsetScale));
break;

case IR::MEM_OFFSET_SXTW.Val:
add(ARMEmitter::Size::i64Bit, Tmp, Base, RegOffset, ARMEmitter::ExtendedType::SXTW, FEXCore::ilog2(OffsetScale));
break;

default: LOGMAN_MSG_A_FMT("Unhandled OffsetType: {}", OffsetType.Val); break;
}
}
return Tmp;
}

ARMEmitter::SVEMemOperand Arm64JITCore::GenerateSVEMemOperand(IR::OpSize AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
IR::MemOffsetType OffsetType, [[maybe_unused]] uint8_t OffsetScale) {
if (Offset.IsInvalid()) {
Expand Down Expand Up @@ -1788,7 +1826,6 @@ DEF_OP(MemSet) {
case 8: stlr(Value.X(), TMP2); break;
default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break;
}
nop();
}

if (Size >= 0) {
Expand Down Expand Up @@ -1993,47 +2030,47 @@ DEF_OP(MemCpy) {
ldaprb(TMP4.W(), TMP3);
stlrb(TMP4.W(), TMP2);
} else {
nop();
switch (OpSize) {
case 2: ldaprh(TMP4.W(), TMP3); break;
case 4: ldapr(TMP4.W(), TMP3); break;
case 8: ldapr(TMP4, TMP3); break;
default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break;
}
nop();

// Placeholders for backpatching barriers (one per load/store)
nop();
nop();

switch (OpSize) {
case 2: stlrh(TMP4.W(), TMP2); break;
case 4: stlr(TMP4.W(), TMP2); break;
case 8: stlr(TMP4, TMP2); break;
default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break;
}
nop();
}
} else {
if (OpSize == 1) {
// 8bit load is always aligned to natural alignment
ldarb(TMP4.W(), TMP3);
stlrb(TMP4.W(), TMP2);
} else {
nop();
switch (OpSize) {
case 2: ldarh(TMP4.W(), TMP3); break;
case 4: ldar(TMP4.W(), TMP3); break;
case 8: ldar(TMP4, TMP3); break;
default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break;
}
nop();

// Placeholders for backpatching barriers (one per load/store)
nop();
nop();

switch (OpSize) {
case 2: stlrh(TMP4.W(), TMP2); break;
case 4: stlr(TMP4.W(), TMP2); break;
case 8: stlr(TMP4, TMP2); break;
default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break;
}
nop();
}
}

Expand Down Expand Up @@ -2191,13 +2228,15 @@ DEF_OP(ParanoidLoadMemTSO) {
const auto Op = IROp->C<IR::IROp_LoadMemTSO>();
const auto OpSize = IROp->Size;

const auto MemReg = GetReg(Op->Addr.ID());
auto MemReg = GetReg(Op->Addr.ID());

if (CTX->HostFeatures.SupportsTSOImm9 && Op->Class == FEXCore::IR::GPRClass) {
const auto Dst = GetReg(Node);
uint64_t Offset = 0;
if (!Op->Offset.IsInvalid()) {
(void)IsInlineConstant(Op->Offset, &Offset);
if (!IsInlineConstant(Op->Offset, &Offset)) {
MemReg = ApplyMemOperand(OpSize, MemReg, TMP4, Op->Offset, Op->OffsetType, Op->OffsetScale);
}
}

if (OpSize == IR::OpSize::i8Bit) {
Expand All @@ -2214,6 +2253,7 @@ DEF_OP(ParanoidLoadMemTSO) {
}
} else if (CTX->HostFeatures.SupportsRCPC && Op->Class == FEXCore::IR::GPRClass) {
const auto Dst = GetReg(Node);
MemReg = ApplyMemOperand(OpSize, MemReg, TMP4, Op->Offset, Op->OffsetType, Op->OffsetScale);
if (OpSize == IR::OpSize::i8Bit) {
// 8bit load is always aligned to natural alignment
ldaprb(Dst.W(), MemReg);
Expand All @@ -2227,6 +2267,7 @@ DEF_OP(ParanoidLoadMemTSO) {
}
} else if (Op->Class == FEXCore::IR::GPRClass) {
const auto Dst = GetReg(Node);
MemReg = ApplyMemOperand(OpSize, MemReg, TMP4, Op->Offset, Op->OffsetType, Op->OffsetScale);
switch (OpSize) {
case IR::OpSize::i8Bit: ldarb(Dst, MemReg); break;
case IR::OpSize::i16Bit: ldarh(Dst, MemReg); break;
Expand All @@ -2236,6 +2277,7 @@ DEF_OP(ParanoidLoadMemTSO) {
}
} else {
const auto Dst = GetVReg(Node);
MemReg = ApplyMemOperand(OpSize, MemReg, TMP4, Op->Offset, Op->OffsetType, Op->OffsetScale);
switch (OpSize) {
case IR::OpSize::i8Bit:
ldarb(TMP1, MemReg);
Expand Down Expand Up @@ -2274,13 +2316,15 @@ DEF_OP(ParanoidStoreMemTSO) {
const auto Op = IROp->C<IR::IROp_StoreMemTSO>();
const auto OpSize = IROp->Size;

const auto MemReg = GetReg(Op->Addr.ID());
auto MemReg = GetReg(Op->Addr.ID());

if (CTX->HostFeatures.SupportsTSOImm9 && Op->Class == FEXCore::IR::GPRClass) {
const auto Src = GetReg(Op->Value.ID());
uint64_t Offset = 0;
if (!Op->Offset.IsInvalid()) {
(void)IsInlineConstant(Op->Offset, &Offset);
if (!IsInlineConstant(Op->Offset, &Offset)) {
MemReg = ApplyMemOperand(OpSize, MemReg, TMP1, Op->Offset, Op->OffsetType, Op->OffsetScale);
}
}

if (OpSize == IR::OpSize::i8Bit) {
Expand All @@ -2296,6 +2340,7 @@ DEF_OP(ParanoidStoreMemTSO) {
}
} else if (Op->Class == FEXCore::IR::GPRClass) {
const auto Src = GetReg(Op->Value.ID());
MemReg = ApplyMemOperand(OpSize, MemReg, TMP1, Op->Offset, Op->OffsetType, Op->OffsetScale);
switch (OpSize) {
case IR::OpSize::i8Bit: stlrb(Src, MemReg); break;
case IR::OpSize::i16Bit: stlrh(Src, MemReg); break;
Expand All @@ -2306,6 +2351,8 @@ DEF_OP(ParanoidStoreMemTSO) {
} else {
const auto Src = GetVReg(Op->Value.ID());

MemReg = ApplyMemOperand(OpSize, MemReg, TMP4, Op->Offset, Op->OffsetType, Op->OffsetScale);

switch (OpSize) {
case IR::OpSize::i8Bit:
umov<ARMEmitter::SubRegSize::i8Bit>(TMP1, Src, 0);
Expand Down

0 comments on commit 56c95e3

Please sign in to comment.