diff options
| -rw-r--r-- | llvm/include/llvm/CodeGen/TargetInstrInfo.h | 23 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/PHIElimination.cpp | 28 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 37 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 49 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir | 54 | 
7 files changed, 176 insertions, 30 deletions
| diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 9384bbcca33..a01d851f510 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -22,6 +22,7 @@  #include "llvm/CodeGen/MachineCombinerPattern.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h"  #include "llvm/CodeGen/MachineLoopInfo.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineOutliner.h" @@ -1638,6 +1639,28 @@ public:      return false;    } +  /// During PHI eleimination lets target to make necessary checks and +  /// insert the copy to the PHI destination register in a target specific +  /// manner. +  virtual MachineInstr *createPHIDestinationCopy( +      MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, +      const DebugLoc &DL, Register Src, Register Dst) const { +    return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst) +        .addReg(Src); +  } + +  /// During PHI eleimination lets target to make necessary checks and +  /// insert the copy to the PHI destination register in a target specific +  /// manner. +  virtual MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, +                                            MachineBasicBlock::iterator InsPt, +                                            const DebugLoc &DL, Register Src, +                                            Register SrcSubReg, +                                            Register Dst) const { +    return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst) +        .addReg(Src, 0, SrcSubReg); +  } +    /// Returns a \p outliner::OutlinedFunction struct containing target-specific    /// information for a set of outlining candidates.    virtual outliner::OutlinedFunction getOutliningCandidateInfo( diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp index 2b3ce890d98..8ffd62b0d32 100644 --- a/llvm/lib/CodeGen/PHIElimination.cpp +++ b/llvm/lib/CodeGen/PHIElimination.cpp @@ -31,7 +31,9 @@  #include "llvm/CodeGen/MachineRegisterInfo.h"  #include "llvm/CodeGen/SlotIndexes.h"  #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetPassConfig.h"  #include "llvm/CodeGen/TargetRegisterInfo.h"  #include "llvm/CodeGen/TargetSubtargetInfo.h"  #include "llvm/Pass.h" @@ -252,11 +254,12 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,    // Insert a register to register copy at the top of the current block (but    // after any remaining phi nodes) which copies the new incoming register    // into the phi node destination. +  MachineInstr *PHICopy = nullptr;    const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();    if (allPhiOperandsUndefined(*MPhi, *MRI))      // If all sources of a PHI node are implicit_def or undef uses, just emit an      // implicit_def instead of a copy. -    BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), +    PHICopy = BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),              TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);    else {      // Can we reuse an earlier PHI node? This only happens for critical edges, @@ -273,15 +276,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,        const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);        entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);      } -    BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), -            TII->get(TargetOpcode::COPY), DestReg) -      .addReg(IncomingReg); +    // Give the target possiblity to handle special cases fallthrough otherwise +    PHICopy = TII->createPHIDestinationCopy(MBB, AfterPHIsIt, MPhi->getDebugLoc(), +                                  IncomingReg, DestReg);    }    // Update live variable information if there is any.    if (LV) { -    MachineInstr &PHICopy = *std::prev(AfterPHIsIt); -      if (IncomingReg) {        LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); @@ -302,7 +303,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,        // killed.  Note that because the value is defined in several places (once        // each for each incoming block), the "def" block and instruction fields        // for the VarInfo is not filled in. -      LV->addVirtualRegisterKilled(IncomingReg, PHICopy); +      LV->addVirtualRegisterKilled(IncomingReg, *PHICopy);      }      // Since we are going to be deleting the PHI node, if it is the last use of @@ -312,15 +313,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,      // If the result is dead, update LV.      if (isDead) { -      LV->addVirtualRegisterDead(DestReg, PHICopy); +      LV->addVirtualRegisterDead(DestReg, *PHICopy);        LV->removeVirtualRegisterDead(DestReg, *MPhi);      }    }    // Update LiveIntervals for the new copy or implicit def.    if (LIS) { -    SlotIndex DestCopyIndex = -        LIS->InsertMachineInstrInMaps(*std::prev(AfterPHIsIt)); +    SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(*PHICopy);      SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);      if (IncomingReg) { @@ -406,9 +406,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,            if (DefMI->isImplicitDef())              ImpDefs.insert(DefMI);        } else { -        NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), -                            TII->get(TargetOpcode::COPY), IncomingReg) -                        .addReg(SrcReg, 0, SrcSubReg); +        NewSrcInstr = +            TII->createPHISourceCopy(opBlock, InsertPos, MPhi->getDebugLoc(), +                                     SrcReg, SrcSubReg, IncomingReg);        }      } @@ -457,7 +457,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,            }          } else {            // We just inserted this copy. -          KillInst = std::prev(InsertPos); +          KillInst = NewSrcInstr;          }        }        assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index aae6595f417..c97bb4cd093 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6410,3 +6410,40 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,        return true;    }  } + +MachineInstr *SIInstrInfo::createPHIDestinationCopy( +    MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt, +    const DebugLoc &DL, Register Src, Register Dst) const { +  auto Cur = MBB.begin(); +  if (Cur != MBB.end()) +    do { +      if (!Cur->isPHI() && Cur->readsRegister(Dst)) +        return BuildMI(MBB, Cur, DL, get(TargetOpcode::COPY), Dst).addReg(Src); +      ++Cur; +    } while (Cur != MBB.end() && Cur != LastPHIIt); + +  return TargetInstrInfo::createPHIDestinationCopy(MBB, LastPHIIt, DL, Src, +                                                   Dst); +} + +MachineInstr *SIInstrInfo::createPHISourceCopy( +    MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, +    const DebugLoc &DL, Register Src, Register SrcSubReg, Register Dst) const { +  if (InsPt != MBB.end() && +      (InsPt->getOpcode() == AMDGPU::SI_IF || +       InsPt->getOpcode() == AMDGPU::SI_ELSE || +       InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) && +      InsPt->definesRegister(Src)) { +    InsPt++; +    return BuildMI(MBB, InsPt, InsPt->getDebugLoc(), +                   get(ST.isWave32() ? AMDGPU::S_MOV_B32_term +                                     : AMDGPU::S_MOV_B64_term), +                   Dst) +        .addReg(Src, 0, SrcSubReg) +        .addReg(AMDGPU::EXEC, RegState::Implicit); +  } +  return TargetInstrInfo::createPHISourceCopy(MBB, InsPt, DL, Src, SrcSubReg, +                                              Dst); +} + +bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 2e629c47256..04671a073d3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -954,6 +954,19 @@ public:    bool isBasicBlockPrologue(const MachineInstr &MI) const override; +  MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB, +                                         MachineBasicBlock::iterator InsPt, +                                         const DebugLoc &DL, Register Src, +                                         Register Dst) const override; + +  MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB, +                                    MachineBasicBlock::iterator InsPt, +                                    const DebugLoc &DL, Register Src, +                                    Register SrcSubReg, +                                    Register Dst) const override; + +  bool isWave32() const; +    /// Return a partially built integer add instruction without carry.    /// Caller must add source operands.    /// For pre-GFX9 it will generate unused carry destination operand. diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 1a51b7ebffa..6f9abd3a8d9 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -98,6 +98,8 @@ private:    void emitLoop(MachineInstr &MI);    void emitEndCf(MachineInstr &MI); +  Register getSaveExec(MachineInstr* MI); +    void findMaskOperands(MachineInstr &MI, unsigned OpNo,                          SmallVectorImpl<MachineOperand> &Src) const; @@ -175,17 +177,31 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,    return true;  } +Register SILowerControlFlow::getSaveExec(MachineInstr *MI) { +  MachineBasicBlock *MBB = MI->getParent(); +  MachineOperand &SaveExec = MI->getOperand(0); +  assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister); + +  Register SaveExecReg = SaveExec.getReg(); +  unsigned FalseTermOpc = +      TII->isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term; +  MachineBasicBlock::iterator I = (MI); +  MachineBasicBlock::iterator J = std::next(I); +  if (J != MBB->end() && J->getOpcode() == FalseTermOpc && +      J->getOperand(1).isReg() && J->getOperand(1).getReg() == SaveExecReg) { +    SaveExecReg = J->getOperand(0).getReg(); +    J->eraseFromParent(); +  } +  return SaveExecReg; +} +  void SILowerControlFlow::emitIf(MachineInstr &MI) {    MachineBasicBlock &MBB = *MI.getParent();    const DebugLoc &DL = MI.getDebugLoc();    MachineBasicBlock::iterator I(&MI); - -  MachineOperand &SaveExec = MI.getOperand(0); -  MachineOperand &Cond = MI.getOperand(1); -  assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister && -         Cond.getSubReg() == AMDGPU::NoSubRegister); - -  Register SaveExecReg = SaveExec.getReg(); +  Register SaveExecReg = getSaveExec(&MI); +  MachineOperand& Cond = MI.getOperand(1); +  assert(Cond.getSubReg() == AMDGPU::NoSubRegister);    MachineOperand &ImpDefSCC = MI.getOperand(4);    assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef()); @@ -266,8 +282,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {    MachineBasicBlock &MBB = *MI.getParent();    const DebugLoc &DL = MI.getDebugLoc(); -  Register DstReg = MI.getOperand(0).getReg(); -  assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister); +  Register DstReg = getSaveExec(&MI);    bool ExecModified = MI.getOperand(3).getImm() != 0;    MachineBasicBlock::iterator Start = MBB.begin(); @@ -339,7 +354,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {  void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {    MachineBasicBlock &MBB = *MI.getParent();    const DebugLoc &DL = MI.getDebugLoc(); -  auto Dst = MI.getOperand(0).getReg(); +  auto Dst = getSaveExec(&MI);    // Skip ANDing with exec if the break condition is already masked by exec    // because it is a V_CMP in the same basic block. (We know the break @@ -400,13 +415,17 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {  void SILowerControlFlow::emitEndCf(MachineInstr &MI) {    MachineBasicBlock &MBB = *MI.getParent(); +  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); +  unsigned CFMask = MI.getOperand(0).getReg(); +  MachineInstr *Def = MRI.getUniqueVRegDef(CFMask);    const DebugLoc &DL = MI.getDebugLoc(); -  MachineBasicBlock::iterator InsPt = MBB.begin(); -  MachineInstr *NewMI = -      BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec) -          .addReg(Exec) -          .add(MI.getOperand(0)); +  MachineBasicBlock::iterator InsPt = +      Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def)) +                               : MBB.begin(); +  MachineInstr *NewMI = BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec) +                            .addReg(Exec) +                            .add(MI.getOperand(0));    if (LIS)      LIS->ReplaceMachineInstrInMaps(MI, *NewMI); diff --git a/llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir b/llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir index 9417927074b..e2e6ea76103 100644 --- a/llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir +++ b/llvm/test/CodeGen/AMDGPU/phi-elimination-assertion.mir @@ -26,8 +26,8 @@ body:             |  # CHECK-LABEL: name:            foo  # CHECK:   bb.3: -# CHECK-NEXT:     %3:sreg_32_xm0 = COPY killed %4  # CHECK-NEXT:     dead %2:sreg_32_xm0 = IMPLICIT_DEF +# CHECK-NEXT:     %3:sreg_32_xm0 = COPY killed %4  # CHECK-NEXT:     S_NOP 0, implicit killed %3 diff --git a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir new file mode 100644 index 00000000000..3c99cc7c19d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir @@ -0,0 +1,54 @@ +# RUN: llc -mtriple amdgcn -run-pass livevars -run-pass phi-node-elimination -verify-machineinstrs -o - %s | FileCheck %s + +# CHECK-LABEL:  phi-cf-test +# CHECK: bb.0: +# CHECK:     [[COND:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 +# CHECK:     [[IF_SOURCE0:%[0-9]+]]:sreg_64 = SI_IF [[COND]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +# CHECK:     [[IF_INPUT_REG:%[0-9]+]]:sreg_64 =  S_MOV_B64_term killed [[IF_SOURCE0]], implicit $exec + +# CHECK: bb.1: +# CHECK:     [[END_CF_ARG:%[0-9]+]]:sreg_64 = COPY killed [[IF_INPUT_REG]] +# CHECK:     SI_END_CF killed [[END_CF_ARG]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + +# CHECK: bb.2: +# CHECK:     [[IF_SOURCE1:%[0-9]+]]:sreg_64 = SI_IF [[COND]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +# CHECK:     [[IF_INPUT_REG]]:sreg_64 =  S_MOV_B64_term killed [[IF_SOURCE1]], implicit $exec + + +... +--- +name:            phi-cf-test +tracksRegLiveness: true +body:             | + +  bb.0: +    successors: %bb.3(0x40000000), %bb.2(0x40000000) +    liveins: $vgpr0 + +    %5:vgpr_32(s32) = COPY $vgpr0 +    %0:sreg_64 = V_CMP_EQ_U32_e64 0, %5(s32), implicit $exec +    %18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +    %22:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +    S_BRANCH %bb.3 + +  bb.2: +    successors: %bb.3(0x80000000) + +    %24:sreg_64 = PHI %20, %bb.3, %22, %bb.0 +    %23:vgpr_32 = PHI %19, %bb.3, %18, %bb.0 +    SI_END_CF %24, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +    %3:vgpr_32, dead %10:sreg_64 = nsw V_ADD_I32_e64 1, %23, 0, implicit $exec + +  bb.3: +    successors: %bb.3(0x40000000), %bb.2(0x40000000) + +    %4:vgpr_32 = PHI %19, %bb.3, %3, %bb.2, %18, %bb.0 +    %15:sreg_32_xm0 = S_MOV_B32 61440 +    %16:sreg_32_xm0 = S_MOV_B32 -1 +    %17:sreg_128 = REG_SEQUENCE undef %14:sreg_32_xm0, %subreg.sub0, undef %12:sreg_32_xm0, %subreg.sub1, %16, %subreg.sub2, %15, %subreg.sub3 +    BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1) +    %19:vgpr_32 = COPY %4 +    %20:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +    S_BRANCH %bb.3 + +... | 

