diff options
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXPeephole.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/NVPTX/local-stack-frame.ll | 6 |
3 files changed, 22 insertions, 11 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp index bdb463f9de5..a61c291d233 100644 --- a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp @@ -22,7 +22,7 @@ // This peephole pass optimizes these cases, for example // // It will transform the following pattern -// %vreg0<def> = LEA_ADDRi64 <fi#0>, 4 +// %vreg0<def> = LEA_ADDRi64 %VRFrame, 4 // %vreg1<def> = cvta_to_local_yes_64 %vreg0 // // into @@ -36,7 +36,6 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -96,7 +95,7 @@ static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) { // Check the LEA_ADDRi operand is Frame index auto &BaseAddrOp = GenericAddrDef->getOperand(1); - if (BaseAddrOp.getType() == MachineOperand::MO_FrameIndex) { + if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) { return true; } @@ -110,16 +109,11 @@ static void CombineCVTAToLocal(MachineInstr &Root) { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); - // Get the correct offset - int FrameIndex = Prev.getOperand(1).getIndex(); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - Prev.getOperand(2).getImm(); - MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()), Root.getOperand(0).getReg()) .addReg(NVPTX::VRFrameLocal) - .addOperand(MachineOperand::CreateImm(Offset)); + .addOperand(Prev.getOperand(2)); MBB.insert((MachineBasicBlock::iterator)&Root, MIB); @@ -145,6 +139,15 @@ bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) { } } // Instruction } // Basic Block + + // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal + const auto &MRI = MF.getRegInfo(); + if (MRI.use_empty(NVPTX::VRFrame)) { + if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) { + MI->eraseFromParentAndMarkDBGValuesForRemoval(); + } + } + return Changed; } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index dc3e34f9647..9d9072efc38 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -205,13 +205,15 @@ bool NVPTXPassConfig::addInstSelector() { if (!ST.hasImageHandles()) addPass(createNVPTXReplaceImageHandlesPass()); - addPass(createNVPTXPeephole()); - return false; } void NVPTXPassConfig::addPostRegAlloc() { addPass(createNVPTXPrologEpilogPass(), false); + // NVPTXPrologEpilogPass calculates frame object offset and replace frame + // index with VRFrame register. NVPTXPeephole need to be run after that and + // will replace VRFrame with VRFrameLocal when possible. + addPass(createNVPTXPeephole()); } FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll index fba5dd883f9..ef1b7da6ad0 100644 --- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll +++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll @@ -59,10 +59,16 @@ define void @foo3(i32 %a) { ; PTX32: cvta.local.u32 %SP, %SPL; ; PTX32: add.u32 {{%r[0-9]+}}, %SP, 0; +; PTX32: add.u32 {{%r[0-9]+}}, %SPL, 0; +; PTX32: add.u32 {{%r[0-9]+}}, %SP, 4; +; PTX32: add.u32 {{%r[0-9]+}}, %SPL, 4; ; PTX32: st.local.u32 [{{%r[0-9]+}}], {{%r[0-9]+}} ; PTX32: st.local.u32 [{{%r[0-9]+}}], {{%r[0-9]+}} ; PTX64: cvta.local.u64 %SP, %SPL; ; PTX64: add.u64 {{%rd[0-9]+}}, %SP, 0; +; PTX64: add.u64 {{%rd[0-9]+}}, %SPL, 0; +; PTX64: add.u64 {{%rd[0-9]+}}, %SP, 4; +; PTX64: add.u64 {{%rd[0-9]+}}, %SPL, 4; ; PTX64: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}} ; PTX64: st.local.u32 [{{%rd[0-9]+}}], {{%r[0-9]+}} define void @foo4() { |