From b810f283b53586c9489d4e027420fe2270d0df9c Mon Sep 17 00:00:00 2001 From: Nikola Peric Date: Thu, 3 Nov 2022 16:37:33 +0100 Subject: [PATCH 1/4] Enable save/restore generation when there is a gap between registers Enable save/restore generation when there is a gap between registers by inserting the missing ones at the moment when spill slots for callee-saved registers are assigned. --- llvm/lib/Target/Mips/MipsSEFrameLowering.cpp | 43 +++++++++++++++---- .../nanomips/saverestore_with_register_gap.ll | 43 +++++++++++++++++++ 2 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/nanomips/saverestore_with_register_gap.ll diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index 56e27f7e71e94..cc8bf9fcc66e0 100644 --- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -935,22 +935,47 @@ bool MipsSEFrameLowering::assignCalleeSavedSpillSlots( if (!STI.hasNanoMips()) return false; + static const std::unordered_map Regs = { + {Mips::GP_NM, 0}, {Mips::FP_NM, 1}, {Mips::RA_NM, 2}, {Mips::S0_NM, 3}, + {Mips::S1_NM, 4}, {Mips::S2_NM, 5}, {Mips::S3_NM, 6}, {Mips::S4_NM, 7}, + {Mips::S5_NM, 8}, {Mips::S6_NM, 9}, {Mips::S7_NM, 10}, + }; + + static const std::unordered_map CSNumToReg = { + {0, Mips::GP_NM}, {1, Mips::FP_NM}, {2, Mips::RA_NM}, {3, Mips::S0_NM}, + {4, Mips::S1_NM}, {5, Mips::S2_NM}, {6, Mips::S3_NM}, {7, Mips::S4_NM}, + {8, Mips::S5_NM}, {9, Mips::S6_NM}, {10, Mips::S7_NM}, + }; + // nanoMIPS save and restore instructions require callee-saved registers to be // saved in particular order on the stack. - auto SortCalleeSaves = [](CalleeSavedInfo First, CalleeSavedInfo Second) { - std::unordered_map Regs{ - {Mips::GP_NM, 0}, {Mips::FP_NM, 1}, {Mips::RA_NM, 2}, {Mips::S0_NM, 3}, - {Mips::S1_NM, 4}, {Mips::S2_NM, 5}, {Mips::S3_NM, 6}, {Mips::S4_NM, 7}, - {Mips::S5_NM, 8}, {Mips::S6_NM, 9}, {Mips::S7_NM, 10}, - }; - + auto CompareCalleeSaves = [](CalleeSavedInfo First, CalleeSavedInfo Second) { // There should be no callee-saved registers that are not part of the list. assert(Regs.find(First.getReg()) != Regs.end() && Regs.find(Second.getReg()) != Regs.end()); - return Regs[First.getReg()] < Regs[Second.getReg()]; + return Regs.at(First.getReg()) < Regs.at(Second.getReg()); }; - std::sort(CSI.begin(), CSI.end(), SortCalleeSaves); + + // If CSI list has less than two callee-saved registers we can + // return from method since no insertions nor sorting is needed + if(CSI.size() < 2) + return false; + + SmallBitVector CSNumBitVector(11); + for (CalleeSavedInfo &CS : CSI) + CSNumBitVector.set(Regs.at(CS.getReg())); + + int MinCSNum = CSNumBitVector.find_first(); + int MaxCSNum = CSNumBitVector.find_last(); + + // Inserting all of the missing callee-saved registers between min and max + // in order to allow further load-store optimizations + for (int i = MinCSNum + 1; i < MaxCSNum; ++i) + if (!CSNumBitVector.test(i)) + CSI.push_back(CalleeSavedInfo(CSNumToReg.at(i))); + + std::sort(CSI.begin(), CSI.end(), CompareCalleeSaves); return false; } diff --git a/llvm/test/CodeGen/Mips/nanomips/saverestore_with_register_gap.ll b/llvm/test/CodeGen/Mips/nanomips/saverestore_with_register_gap.ll new file mode 100644 index 0000000000000..01bc748e918dd --- /dev/null +++ b/llvm/test/CodeGen/Mips/nanomips/saverestore_with_register_gap.ll @@ -0,0 +1,43 @@ +; RUN: llc -mtriple=nanomips -asm-show-inst -verify-machineinstrs < %s | FileCheck %s + +; Make sure that SAVE/RESTORE instructions are working even though there +; is a gap in the callee-saved register sequence. +define void @test1() { +; CHECK: save 32, $s0, $s1, $s2, $s3, $s4 + call void asm sideeffect "", "~{$16},~{$18},~{$20},~{$1}"() ret void +; CHECK: restore.jrc 32, $s0, $s1, $s2, $s3, $s4 +} + +define void @test2() { +; CHECK: save 48, $fp, $ra, $s0, $s1, $s2, $s3, $s4, $s5, $s6, $s7 + call void asm sideeffect "", "~{$16},~{$23},~{$30},~{$1}"() ret void +; CHECK: restore.jrc 48, $fp, $ra, $s0, $s1, $s2, $s3, $s4, $s5, $s6, $s7 +} + +; Make sure to generate correct SAVE/RESTOR sp offset in case there was a gap +; in callee-saved register sequence and there are more things to be stored on +; stack after storing values from callee-saved registers. For example values +; from ax registers, used to pass function arguments, can be stored on stack +; after values from callee-saved registers. +define void @test3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) { + + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + %d.addr = alloca i32, align 4 + %e.addr = alloca i32, align 4 + %f.addr = alloca i32, align 4 + %g.addr = alloca i32, align 4 + %h.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + store i32 %d, i32* %d.addr, align 4 + store i32 %e, i32* %e.addr, align 4 + store i32 %f, i32* %f.addr, align 4 + store i32 %g, i32* %g.addr, align 4 + store i32 %h, i32* %h.addr, align 4 + ; CHECK: save 64, $s0, $s1, $s2, $s3, $s4, $s5, $s6, $s7 + call void asm sideeffect "", "~{$16},~{$23},~{$1}"() ret void + ; CHECK: restore.jrc 64, $s0, $s1, $s2, $s3, $s4, $s5, $s6, $s7 +} \ No newline at end of file From 9a56b55b32f92d0983b9200101f01c1371edf3d3 Mon Sep 17 00:00:00 2001 From: Nikola Peric Date: Thu, 9 Feb 2023 17:55:04 +0100 Subject: [PATCH 2/4] NanoMips: Two-step stack setup Adjust stack pointer in two steps if offset is larger than 4096. In the first step adjust stack pointer for the size necessary to spill CSR onto the stack. In the second step adjust stack pointer for the size necessary to spill local objects. --- llvm/lib/Target/Mips/MipsMachineFunction.cpp | 16 +++ llvm/lib/Target/Mips/MipsMachineFunction.h | 4 + llvm/lib/Target/Mips/MipsSEFrameLowering.cpp | 131 ++++++++++++++---- llvm/lib/Target/Mips/MipsSERegisterInfo.cpp | 13 +- .../test/CodeGen/Mips/nanomips/saverestore.ll | 11 -- .../Mips/nanomips/two-step-stack-setup.ll | 36 +++++ 6 files changed, 174 insertions(+), 37 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/nanomips/two-step-stack-setup.ll diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/llvm/lib/Target/Mips/MipsMachineFunction.cpp index a7a2be30f58a4..f89d3f569c25c 100644 --- a/llvm/lib/Target/Mips/MipsMachineFunction.cpp +++ b/llvm/lib/Target/Mips/MipsMachineFunction.cpp @@ -201,3 +201,19 @@ int MipsFunctionInfo::getMoveF64ViaSpillFI(MachineFunction &MF, } void MipsFunctionInfo::anchor() {} + +unsigned MipsFunctionInfo::getCalleeSavedStackSize() { + return CalleeSavedStackSize; +} + +void MipsFunctionInfo::setCalleeSavedStackSize(unsigned Size) { + CalleeSavedStackSize = Size; +} + +bool MipsFunctionInfo::isTwoStepStackSetup(MachineFunction &MF) { + + const MipsSubtarget &STI = + *static_cast(&MF.getSubtarget()); + + return (MF.getFrameInfo().getStackSize() > 4096) && STI.hasNanoMips(); +} diff --git a/llvm/lib/Target/Mips/MipsMachineFunction.h b/llvm/lib/Target/Mips/MipsMachineFunction.h index 64179ed45d36f..1a8d25458dd0b 100644 --- a/llvm/lib/Target/Mips/MipsMachineFunction.h +++ b/llvm/lib/Target/Mips/MipsMachineFunction.h @@ -108,6 +108,9 @@ class MipsFunctionInfo : public MachineFunctionInfo { JumpTableEntryInfo[Idx]->Signed = Sign; } } + unsigned getCalleeSavedStackSize(); + void setCalleeSavedStackSize(unsigned Size); + bool isTwoStepStackSetup(MachineFunction &MF); private: virtual void anchor(); @@ -168,6 +171,7 @@ class MipsFunctionInfo : public MachineFunctionInfo { }; SmallVector JumpTableEntryInfo; + unsigned CalleeSavedStackSize = 0; }; } // end namespace llvm diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index cc8bf9fcc66e0..f079bd802c2bd 100644 --- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -432,19 +432,61 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - // Adjust stack. - TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); + const std::vector &CSI = MFI.getCalleeSavedInfo(); + + int64_t CalleeSavedStackSize; + int64_t LoaclStackSize; + // If we have two-step stack setup MBBI_2 will point to the + // first instruction after calle-saves store sequence + MachineBasicBlock::iterator MBBI_2 = MBBI; + + if (MipsFI->isTwoStepStackSetup(MF)) { + + CalleeSavedStackSize = MipsFI->getCalleeSavedStackSize(); + unsigned NumOfCSI = MFI.getCalleeSavedInfo().size(); + + // Move MBBI_2 to point to the first instruction after + // calle-saves store sequence. That's the place for the second + // steck pointer adjustment. + std::advance(MBBI_2, NumOfCSI); + + // The first stack pointer adjustment to cover space needed + // to spill callee-saved registers on stack. + TII.adjustStackPtr(SP, -CalleeSavedStackSize, MBB, MBBI); + + LoaclStackSize = StackSize - CalleeSavedStackSize; - // emit ".cfi_def_cfa_offset StackSize" - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + // The second stack pointer adjustment to cover space needed + // to spill local objects on stack. + TII.adjustStackPtr(SP, -LoaclStackSize, MBB, MBBI_2); + + } else + // Adjust stack. + TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); + + if (MipsFI->isTwoStepStackSetup(MF)) { + // emit ".cfi_def_cfa_offset CalleeSavedStackSize" + // emit ".cfi_def_cfa_offset StackSize = CalleeSavedStackSize + + // LoaclStackSize" + unsigned CFIIndex_1 = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, CalleeSavedStackSize)); + unsigned CFIIndex_2 = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex_1); + BuildMI(MBB, MBBI_2, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex_2); + } else { + // emit ".cfi_def_cfa_offset StackSize" + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } if (MF.getFunction().hasFnAttribute("interrupt")) emitInterruptPrologueStub(MF, MBB); - const std::vector &CSI = MFI.getCalleeSavedInfo(); if (!CSI.empty()) { // Find the instruction past the last instruction that saves a callee-saved @@ -531,6 +573,14 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(MOVE), FP).addReg(SP).addReg(ZERO) .setMIFlag(MachineInstr::FrameSetup); + if (MipsFI->isTwoStepStackSetup(MF)) + // If we have two-step stack setup insert instruction "move $fp, $sp" + // after the second stack setup also + BuildMI(MBB, MBBI_2, dl, TII.get(MOVE), FP) + .addReg(SP) + .addReg(ZERO) + .setMIFlag(MachineInstr::FrameSetup); + // emit ".cfi_def_cfa_register $fp" unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FP, true))); @@ -747,8 +797,26 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, if (!StackSize) return; - // Adjust stack. - TII.adjustStackPtr(SP, StackSize, MBB, MBBI); + if (MipsFI->isTwoStepStackSetup(MF)) { + + int64_t CalleeSavedStackSize = MipsFI->getCalleeSavedStackSize(); + int64_t LoaclStackSize = StackSize - CalleeSavedStackSize; + + int64_t NumOfCSI = MFI.getCalleeSavedInfo().size(); + + MachineBasicBlock::iterator MBBI_2 = MBBI; + // Move MBBI_2 to point to the first instruction in + // calle-saved load sequence. That's the place where we + // need to undo the second stack adjustment + std::advance(MBBI_2, (-1) * NumOfCSI); + + // Undo the second stack pointer adjustment + TII.adjustStackPtr(SP, LoaclStackSize, MBB, MBBI_2); + // Undo the first stack pointer adjustment + TII.adjustStackPtr(SP, CalleeSavedStackSize, MBB, MBBI); + } else + // Adjust stack. + TII.adjustStackPtr(SP, StackSize, MBB, MBBI); } void MipsSEFrameLowering::emitInterruptEpilogueStub( @@ -957,25 +1025,38 @@ bool MipsSEFrameLowering::assignCalleeSavedSpillSlots( return Regs.at(First.getReg()) < Regs.at(Second.getReg()); }; - // If CSI list has less than two callee-saved registers we can - // return from method since no insertions nor sorting is needed - if(CSI.size() < 2) - return false; + // If CSI list has less than two callee-saved registers + // no insertions nor sorting is needed + if (CSI.size() >= 2) { + + SmallBitVector CSNumBitVector(11); + for (CalleeSavedInfo &CS : CSI) + CSNumBitVector.set(Regs.at(CS.getReg())); - SmallBitVector CSNumBitVector(11); - for (CalleeSavedInfo &CS : CSI) - CSNumBitVector.set(Regs.at(CS.getReg())); + int MinCSNum = CSNumBitVector.find_first(); + int MaxCSNum = CSNumBitVector.find_last(); - int MinCSNum = CSNumBitVector.find_first(); - int MaxCSNum = CSNumBitVector.find_last(); + // Inserting all of the missing callee-saved registers between min and max + // in order to allow further load-store optimizations + for (int i = MinCSNum + 1; i < MaxCSNum; ++i) + if (!CSNumBitVector.test(i)) + CSI.push_back(CalleeSavedInfo(CSNumToReg.at(i))); + + std::sort(CSI.begin(), CSI.end(), CompareCalleeSaves); + } - // Inserting all of the missing callee-saved registers between min and max - // in order to allow further load-store optimizations - for (int i = MinCSNum + 1; i < MaxCSNum; ++i) - if (!CSNumBitVector.test(i)) - CSI.push_back(CalleeSavedInfo(CSNumToReg.at(i))); + MipsFunctionInfo *MipsFI = MF.getInfo(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + unsigned CalleeSavedOffsetSize = 0; + for (CalleeSavedInfo &CS : CSI) { + Register Reg = CS.getReg(); + auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8; + CalleeSavedOffsetSize += RegSize; + } + uint64_t AlignedCSStackSize = alignTo(CalleeSavedOffsetSize, 16); + MipsFI->setCalleeSavedStackSize(AlignedCSStackSize); - std::sort(CSI.begin(), CSI.end(), CompareCalleeSaves); return false; } diff --git a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp index b8ea753c3ed8e..b0fdfac56fc16 100644 --- a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -220,7 +220,18 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, bool IsKill = false; int64_t Offset; - Offset = SPOffset + (int64_t)StackSize; + if (MipsFI->isTwoStepStackSetup(MF)) { + + int64_t CalleeSavedStackSize = MipsFI->getCalleeSavedStackSize(); + + if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) + Offset = SPOffset + (int64_t)CalleeSavedStackSize; + else + Offset = SPOffset + StackSize; + + } else + Offset = SPOffset + (int64_t)StackSize; + Offset += MI.getOperand(OpNo + 1).getImm(); LLVM_DEBUG(errs() << "Offset : " << Offset << "\n" diff --git a/llvm/test/CodeGen/Mips/nanomips/saverestore.ll b/llvm/test/CodeGen/Mips/nanomips/saverestore.ll index 3d2e4ef32e3fe..e9c29cd8012d3 100644 --- a/llvm/test/CodeGen/Mips/nanomips/saverestore.ll +++ b/llvm/test/CodeGen/Mips/nanomips/saverestore.ll @@ -7,17 +7,6 @@ define void @test() { ; CHECK: restore.jrc 32, $s0, $s1, $s2, $s3, $s4, $s5, $s6, $s7 } -; Make sure that SAVE/RESTORE instructions are not used when the offset is larger than 4092. -define void @test2() { -; CHECK-NOT: save - %foo = alloca [4096 x i8], align 1 - %1 = getelementptr inbounds [4096 x i8], [4096 x i8]* %foo, i32 0, i32 0 - call void asm sideeffect "", "r,~{$16},~{$17},~{$18},~{$19},~{$20},~{$21},~{$23},~{$22},~{$1}"(i8* %1) - ret void -; CHECK-NOT: restore.jrc -; CHECK-NOT: restore -} - ; Make sure that SAVE/SAVE combination is used when incoming arguments need to ; be stored on the stack. First SAVE to move the stack pointer to where s-regs ; need to be stored and second SAVE to actually save the registers. Same logic diff --git a/llvm/test/CodeGen/Mips/nanomips/two-step-stack-setup.ll b/llvm/test/CodeGen/Mips/nanomips/two-step-stack-setup.ll new file mode 100644 index 0000000000000..4acc1dcbe3b0e --- /dev/null +++ b/llvm/test/CodeGen/Mips/nanomips/two-step-stack-setup.ll @@ -0,0 +1,36 @@ +; RUN: llc -mtriple=nanomips -asm-show-inst -verify-machineinstrs < %s | FileCheck %s + +; Check if stack setup is splitted into two-step setup when sp +; offset is larger than 4096 +define void @test1() { + ; CHECK: save 32, $s0, $s1, $s2, $s3, $s4 + ; CHECK: addiu $sp, $sp, -4096 + %foo = alloca [4096 x i8], align 1 + %1 = getelementptr inbounds [4096 x i8], [4096 x i8]* %foo, i32 0, i32 0 + call void asm sideeffect "", "r,~{$16},~{$17},~{$18},~{$19},~{$20},~{$1}"(i8* %1) + ret void + ; CHECK: addiu $sp, $sp, 4096 + ; CHECK: restore.jrc 32, $s0, $s1, $s2, $s3, $s4 +} + +; Check if there are two instructions for storing sp in fp +; if function uses fp and two-step stack setup is present +define void @test2() #0 { + ; CHECK: save 32, $fp, $ra, $s0, $s1, $s2, $s3, $s4 + ; CHECK: or $fp, $sp, $zero + ; CHECK: addiu $sp, $sp, -4096 + ; CHECK: or $fp, $sp, $zero + %foo = alloca [4096 x i8], align 1 + %1 = getelementptr inbounds [4096 x i8], [4096 x i8]* %foo, i32 0, i32 0 + call void asm sideeffect "", "r,~{$16},~{$17},~{$18},~{$19},~{$20},~{$1}"(i8* %1) + ret void + ; CHECK: addiu $sp, $sp, 4096 + ; CHECK: restore.jrc 32, $fp, $ra, $s0, $s1, $s2, $s3, $s4 +} + +attributes #0 = { "frame-pointer"="all"} + +!llvm.module.flags = !{!0} + +!0 = !{i32 7, !"frame-pointer", i32 2} + From fc5fa13bb204b968c8d5a471d7e67daec34c34ae Mon Sep 17 00:00:00 2001 From: Nikola Peric Date: Thu, 23 Feb 2023 17:11:30 +0100 Subject: [PATCH 3/4] NanoMips: Frame pointer setup If function uses FP, it will now point to address -4096 from the beginning of function's stack. After FP setup following offsets will be relative to SP if function has no var-sized objects. If it has var-sized objects offsets will be relative to FP. Also, stack realignment now happens with INS instruction. --- llvm/lib/Target/Mips/MipsRegisterInfo.cpp | 10 ++ llvm/lib/Target/Mips/MipsSEFrameLowering.cpp | 57 ++++++--- llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 7 +- llvm/lib/Target/Mips/MipsSERegisterInfo.cpp | 29 ++++- .../CodeGen/Mips/nanomips/frame_pointer.ll | 121 ++++++++++++++++++ .../Mips/nanomips/stack_realignment.ll | 13 ++ .../Mips/nanomips/two-step-stack-setup.ll | 22 ---- 7 files changed, 210 insertions(+), 49 deletions(-) create mode 100644 llvm/test/CodeGen/Mips/nanomips/frame_pointer.ll create mode 100644 llvm/test/CodeGen/Mips/nanomips/stack_realignment.ll diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp index b4fcb7da9bee2..e022861c43678 100644 --- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp @@ -303,11 +303,21 @@ Register MipsRegisterInfo:: getFrameRegister(const MachineFunction &MF) const { const MipsSubtarget &Subtarget = MF.getSubtarget(); const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); bool IsN64 = static_cast(MF.getTarget()).getABI().IsN64(); bool IsP32 = static_cast(MF.getTarget()).getABI().IsP32(); + // If function doesn't have var-sized objects and function doesn't need stack + // realignment but frame pointer elimination is disabled we want offsets to be + // relative to sp instead of fp + if (Subtarget.hasNanoMips()) + if (!MFI.hasVarSizedObjects() && !TRI->hasStackRealignment(MF) && + MF.getTarget().Options.DisableFramePointerElim(MF)) + return Mips::SP_NM; + if (Subtarget.inMips16Mode()) return TFI->hasFP(MF) ? Mips::S0 : Mips::SP; else diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index f079bd802c2bd..118849323ec6d 100644 --- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -569,23 +569,40 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { - // Insert instruction "move $fp, $sp" at this location. - BuildMI(MBB, MBBI, dl, TII.get(MOVE), FP).addReg(SP).addReg(ZERO) - .setMIFlag(MachineInstr::FrameSetup); - if (MipsFI->isTwoStepStackSetup(MF)) - // If we have two-step stack setup insert instruction "move $fp, $sp" - // after the second stack setup also - BuildMI(MBB, MBBI_2, dl, TII.get(MOVE), FP) + if (STI.hasNanoMips()) { + + BuildMI(MBB, MBBI_2, dl, TII.get(ADDiu), FP) + .addReg(SP) + .addImm(-4096 + StackSize); + + // emit ".cfi_def_cfa_register $fp" + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MRI->getDwarfRegNum(FP, true))); + BuildMI(MBB, MBBI_2, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // emit ".cfi_def_cfa_offset 4096" + unsigned CFIIndex_1 = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 4096)); + BuildMI(MBB, MBBI_2, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex_1); + + } else { + // Insert instruction "move $fp, $sp" at this location. + BuildMI(MBB, MBBI, dl, TII.get(MOVE), FP) .addReg(SP) .addReg(ZERO) .setMIFlag(MachineInstr::FrameSetup); - // emit ".cfi_def_cfa_register $fp" - unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( - nullptr, MRI->getDwarfRegNum(FP, true))); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + // emit ".cfi_def_cfa_register $fp" + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MRI->getDwarfRegNum(FP, true))); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } if (RegInfo.hasStackRealignment(MF)) { // addiu $Reg, $zero, -MaxAlignment @@ -595,13 +612,19 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, "Function's alignment size requirement is not supported."); int64_t MaxAlign = -(int64_t)MFI.getMaxAlign().value(); - if (ABI.IsP32()) - BuildMI(MBB, MBBI, dl, TII.get(Mips::Li_NM), VR).addImm(MaxAlign); - else + if (ABI.IsP32()) { + uint64_t MaxAlignment = MFI.getMaxAlign().value(); + BuildMI(MBB, MBBI, dl, TII.get(Mips::INS_NM), SP) + .addReg(ZERO) + .addImm(0) + .addImm(Log2_64(MaxAlignment)) + .addReg(SP); + } else { BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR) .addReg(ZERO) .addImm(MaxAlign); - BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); + BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR); + } if (hasBP(MF)) { // move $s7, $sp @@ -759,7 +782,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, unsigned MOVE = ABI.GetGPRMoveOp(); // if framepointer enabled, restore the stack pointer. - if (hasFP(MF)) { + if (hasFP(MF) && !STI.hasNanoMips()) { // Find the first instruction that restores a callee-saved register. MachineBasicBlock::iterator I = MBBI; diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index eac9bec5f5f3b..b5e159471d748 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -461,8 +461,8 @@ bool MipsSEDAGToDAGISel::selectIntAddrLSL2MM(SDValue Addr, SDValue &Base, bool MipsSEDAGToDAGISel::selectIntAddrSImm9(SDValue Addr, SDValue &Base, SDValue &Offset) const { - return selectAddrFrameIndex(Addr, Base, Offset) || - selectAddrFrameIndexOffset(Addr, Base, Offset, 9); + return selectAddrFrameIndexOffset(Addr, Base, Offset, 9) && + !isa(Base); } bool MipsSEDAGToDAGISel::selectIntAddrSImm10(SDValue Addr, SDValue &Base, @@ -541,7 +541,8 @@ bool MipsSEDAGToDAGISel::selectAddrFrameIndexUOffset( bool MipsSEDAGToDAGISel::selectIntAddrUImm12(SDValue Addr, SDValue &Base, SDValue &Offset) const { - return selectAddrFrameIndexUOffset(Addr, Base, Offset, 12, 0); + return selectAddrFrameIndex(Addr, Base, Offset) || + selectAddrFrameIndexUOffset(Addr, Base, Offset, 12, 0); } // A load/store 'x' indexed (reg + reg) diff --git a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp index b0fdfac56fc16..e0b498c6b40bc 100644 --- a/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -175,6 +175,8 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, static_cast(MF.getTarget()).getABI(); const MipsRegisterInfo *RegInfo = static_cast(MF.getSubtarget().getRegisterInfo()); + const MipsSubtarget &STI = + *static_cast(&MF.getSubtarget()); const std::vector &CSI = MFI.getCalleeSavedInfo(); int MinCSFI = 0; @@ -220,14 +222,25 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, bool IsKill = false; int64_t Offset; - if (MipsFI->isTwoStepStackSetup(MF)) { + if (STI.hasNanoMips()) { - int64_t CalleeSavedStackSize = MipsFI->getCalleeSavedStackSize(); + if (MipsFI->isTwoStepStackSetup(MF)) { - if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) - Offset = SPOffset + (int64_t)CalleeSavedStackSize; - else - Offset = SPOffset + StackSize; + int64_t CalleeSavedStackSize = MipsFI->getCalleeSavedStackSize(); + if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) + Offset = SPOffset + (int64_t)CalleeSavedStackSize; + else if (FrameReg == Mips::FP_NM) + Offset = SPOffset + 4096; + else + Offset = SPOffset + StackSize; + + } else { + + if (FrameReg == Mips::FP_NM) + Offset = SPOffset + 4096; + else + Offset = SPOffset + StackSize; + } } else Offset = SPOffset + (int64_t)StackSize; @@ -258,7 +271,9 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, // TODO: This doesn't work well for nanoMIPS, because it has unsigned // offsets and this check assumes signed. if (OffsetBitSize < 16 && isInt<16>(Offset) && - (!isIntN(OffsetBitSize, Offset) || !isAligned(OffsetAlign, Offset))) { + (STI.hasNanoMips() ? !isUIntN(OffsetBitSize, Offset) + : !isIntN(OffsetBitSize, Offset) || + !isAligned(OffsetAlign, Offset))) { // If we have an offset that needs to fit into a signed n-bit immediate // (where n < 16) and doesn't, but does fit into 16-bits then use an ADDiu MachineBasicBlock &MBB = *MI.getParent(); diff --git a/llvm/test/CodeGen/Mips/nanomips/frame_pointer.ll b/llvm/test/CodeGen/Mips/nanomips/frame_pointer.ll new file mode 100644 index 0000000000000..6ae4a9e9eb265 --- /dev/null +++ b/llvm/test/CodeGen/Mips/nanomips/frame_pointer.ll @@ -0,0 +1,121 @@ +; RUN: llc -mtriple=nanomips -asm-show-inst -verify-machineinstrs < %s | FileCheck %s + +; Check if fp is set correctly if function wants to use it. +; We want it to point to -4096 from the beginning of the stack. +define void @test1(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) #0 { +entry: + ; CHECK: save 64, $fp, $ra, $s0, $s1, $s2, $s3, $s4 + ; CHECK: addiu $fp, $sp, -4032 + ; CHECK: sw $a0, 32($sp) + ; CHECK: restore.jrc 64, $fp, $ra, $s0, $s1, $s2, $s3, $s4 + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + %d.addr = alloca i32, align 4 + %e.addr = alloca i32, align 4 + %f.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + store i32 %d, i32* %d.addr, align 4 + store i32 %e, i32* %e.addr, align 4 + store i32 %f, i32* %f.addr, align 4 + call void asm sideeffect "", "~{$16},~{$17},~{$18},~{$19},~{$20},~{$1}"() + + ret void +} + +; Check if offsets after fp setup are relative to fp if varible-sized +; objects are present in function. +declare void @callee2(i8*) +define void @test2(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) #0 { +entry: + ; CHECK: save 64, $fp, $ra, $s0, $s1, $s2, $s3, $s4 + ; CHECK: addiu $fp, $sp, -4032 + ; CHECK: sw $a0, 4064($fp) + ; CHECK: restore.jrc 64, $fp, $ra, $s0, $s1, $s2, $s3, $s4 + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + %d.addr = alloca i32, align 4 + %e.addr = alloca i32, align 4 + %f.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + store i32 %d, i32* %d.addr, align 4 + store i32 %e, i32* %e.addr, align 4 + store i32 %f, i32* %f.addr, align 4 + + %0 = alloca i8, i32 %a + call void @callee2(i8* %0) + + call void asm sideeffect "", "~{$16},~{$17},~{$18},~{$19},~{$20},~{$1}"() + + ret void +} + +; Check if offsets after fp setup stays relative to sp if +; function needs stack realignment +declare void @callee3(i32*) +define void @test3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) #0 { +entry: + ; CHECK: save 64, $fp, $ra, $s0, $s1, $s2, $s3, $s4 + ; CHECK: addiu $fp, $sp, -4032 + ; CHECK: sw $a0, 32($sp) + ; CHECK: restore.jrc 64, $fp, $ra, $s0, $s1, $s2, $s3, $s4 + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + %d.addr = alloca i32, align 4 + %e.addr = alloca i32, align 4 + %f.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + store i32 %d, i32* %d.addr, align 4 + store i32 %e, i32* %e.addr, align 4 + store i32 %f, i32* %f.addr, align 4 + + %0 = alloca i32, align 64 + call void @callee3(i32 *%0) + + call void asm sideeffect "", "~{$16},~{$17},~{$18},~{$19},~{$20},~{$1}"() + + ret void +} + +; Check if offsets after fp setup are relative to BasePtr if varible-sized +; objects are present in function and function needs stack realignment +declare void @callee4(i8*, i32*) +define void @test4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) #0 { +entry: + ; CHECK: save 192, $fp, $ra, $s0, $s1, $s2, $s3, $s4, $s5, $s6, $s7 + ; CHECK: addiu $fp, $sp, -3904 + ; CHECK: sw $a0, 148($s7) + ; CHECK: restore.jrc 192, $fp, $ra, $s0, $s1, $s2, $s3, $s4, $s5, $s6, $s7 + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + %d.addr = alloca i32, align 4 + %e.addr = alloca i32, align 4 + %f.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + store i32 %d, i32* %d.addr, align 4 + store i32 %e, i32* %e.addr, align 4 + store i32 %f, i32* %f.addr, align 4 + + %0 = alloca i8, i32 %a + %1 = alloca i32, align 64 + call void @callee4(i8* %0, i32 *%1) + + call void asm sideeffect "", "~{$16},~{$17},~{$18},~{$19},~{$20},~{$1}"() + + ret void +} + +attributes #0 = {"frame-pointer"="all"} +!llvm.module.flags = !{!0} +!0 = !{i32 7, !"frame-pointer", i32 2} diff --git a/llvm/test/CodeGen/Mips/nanomips/stack_realignment.ll b/llvm/test/CodeGen/Mips/nanomips/stack_realignment.ll new file mode 100644 index 0000000000000..30acdf93c258e --- /dev/null +++ b/llvm/test/CodeGen/Mips/nanomips/stack_realignment.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=nanomips -asm-show-inst -verify-machineinstrs < %s | FileCheck %s + +; Check if stack realignment is done using INS instruction +; if function needs it. +declare void @callee(i32*) +define void @test() { +entry: + ; CHECK: ins $sp, $zero, 0, 6 + %0 = alloca i32, align 64 + call void @callee(i32 *%0) + ret void +} + diff --git a/llvm/test/CodeGen/Mips/nanomips/two-step-stack-setup.ll b/llvm/test/CodeGen/Mips/nanomips/two-step-stack-setup.ll index 4acc1dcbe3b0e..4145fc02c96ba 100644 --- a/llvm/test/CodeGen/Mips/nanomips/two-step-stack-setup.ll +++ b/llvm/test/CodeGen/Mips/nanomips/two-step-stack-setup.ll @@ -12,25 +12,3 @@ define void @test1() { ; CHECK: addiu $sp, $sp, 4096 ; CHECK: restore.jrc 32, $s0, $s1, $s2, $s3, $s4 } - -; Check if there are two instructions for storing sp in fp -; if function uses fp and two-step stack setup is present -define void @test2() #0 { - ; CHECK: save 32, $fp, $ra, $s0, $s1, $s2, $s3, $s4 - ; CHECK: or $fp, $sp, $zero - ; CHECK: addiu $sp, $sp, -4096 - ; CHECK: or $fp, $sp, $zero - %foo = alloca [4096 x i8], align 1 - %1 = getelementptr inbounds [4096 x i8], [4096 x i8]* %foo, i32 0, i32 0 - call void asm sideeffect "", "r,~{$16},~{$17},~{$18},~{$19},~{$20},~{$1}"(i8* %1) - ret void - ; CHECK: addiu $sp, $sp, 4096 - ; CHECK: restore.jrc 32, $fp, $ra, $s0, $s1, $s2, $s3, $s4 -} - -attributes #0 = { "frame-pointer"="all"} - -!llvm.module.flags = !{!0} - -!0 = !{i32 7, !"frame-pointer", i32 2} - From b905942f6fc606dbee1f7257b8ba617be1fb52ee Mon Sep 17 00:00:00 2001 From: Nikola Peric Date: Wed, 26 Jul 2023 17:35:45 +0200 Subject: [PATCH 4/4] Merged same if instructions --- llvm/lib/Target/Mips/MipsSEFrameLowering.cpp | 22 +++++++++----------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp index 118849323ec6d..fdcf4b796af22 100644 --- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -435,7 +435,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, const std::vector &CSI = MFI.getCalleeSavedInfo(); int64_t CalleeSavedStackSize; - int64_t LoaclStackSize; + int64_t LocalStackSize; // If we have two-step stack setup MBBI_2 will point to the // first instruction after calle-saves store sequence MachineBasicBlock::iterator MBBI_2 = MBBI; @@ -447,27 +447,22 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, // Move MBBI_2 to point to the first instruction after // calle-saves store sequence. That's the place for the second - // steck pointer adjustment. + // stack pointer adjustment. std::advance(MBBI_2, NumOfCSI); // The first stack pointer adjustment to cover space needed // to spill callee-saved registers on stack. TII.adjustStackPtr(SP, -CalleeSavedStackSize, MBB, MBBI); - LoaclStackSize = StackSize - CalleeSavedStackSize; + LocalStackSize = StackSize - CalleeSavedStackSize; // The second stack pointer adjustment to cover space needed // to spill local objects on stack. - TII.adjustStackPtr(SP, -LoaclStackSize, MBB, MBBI_2); + TII.adjustStackPtr(SP, -LocalStackSize, MBB, MBBI_2); - } else - // Adjust stack. - TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); - - if (MipsFI->isTwoStepStackSetup(MF)) { // emit ".cfi_def_cfa_offset CalleeSavedStackSize" // emit ".cfi_def_cfa_offset StackSize = CalleeSavedStackSize + - // LoaclStackSize" + // LocalStackSize" unsigned CFIIndex_1 = MF.addFrameInst( MCCFIInstruction::cfiDefCfaOffset(nullptr, CalleeSavedStackSize)); unsigned CFIIndex_2 = @@ -477,6 +472,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI_2, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex_2); } else { + // Adjust stack. + TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); + // emit ".cfi_def_cfa_offset StackSize" unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); @@ -823,7 +821,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, if (MipsFI->isTwoStepStackSetup(MF)) { int64_t CalleeSavedStackSize = MipsFI->getCalleeSavedStackSize(); - int64_t LoaclStackSize = StackSize - CalleeSavedStackSize; + int64_t LocalStackSize = StackSize - CalleeSavedStackSize; int64_t NumOfCSI = MFI.getCalleeSavedInfo().size(); @@ -834,7 +832,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF, std::advance(MBBI_2, (-1) * NumOfCSI); // Undo the second stack pointer adjustment - TII.adjustStackPtr(SP, LoaclStackSize, MBB, MBBI_2); + TII.adjustStackPtr(SP, LocalStackSize, MBB, MBBI_2); // Undo the first stack pointer adjustment TII.adjustStackPtr(SP, CalleeSavedStackSize, MBB, MBBI); } else