diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-03-28 14:01:39 +0000 | 
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-03-28 14:01:39 +0000 | 
| commit | a353fd572abbbf00f5ccd496bc2571fb23c517e3 (patch) | |
| tree | 3e1d5de7cdd01ab9fcd49803b2da2acf9942f87a /llvm/lib | |
| parent | c325be6cefde8513b80145d8c86c536df3f82fe0 (diff) | |
| download | bcm5719-llvm-a353fd572abbbf00f5ccd496bc2571fb23c517e3.tar.gz bcm5719-llvm-a353fd572abbbf00f5ccd496bc2571fb23c517e3.zip | |
AMDGPU: Make exec mask optimzations more resistant to block splits
Also improve the check for SALU instructions to also ignore
implicit_def and other fake instructions.
llvm-svn: 357170
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 21 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 81 | 
3 files changed, 84 insertions, 22 deletions
| diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 7639a3fe138..370b8cf2cfb 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2453,6 +2453,27 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const    return false;  } +bool SIInstrInfo::mayReadEXEC(const MachineRegisterInfo &MRI, +                              const MachineInstr &MI) const { +  if (MI.isMetaInstruction()) +    return false; + +  // This won't read exec if this is an SGPR->SGPR copy. +  if (MI.isCopyLike()) { +    if (!RI.isSGPRReg(MRI, MI.getOperand(0).getReg())) +      return true; + +    // Make sure this isn't copying exec as a normal operand +    return MI.readsRegister(AMDGPU::EXEC, &RI); +  } + +  // Be conservative with any unhandled generic opcodes. +  if (!isTargetSpecificOpcode(MI.getOpcode())) +    return true; + +  return !isSALU(MI) || MI.readsRegister(AMDGPU::EXEC, &RI); +} +  bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {    switch (Imm.getBitWidth()) {    case 32: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 51b5df93fef..13e3dbd3cfe 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -624,6 +624,10 @@ public:    /// Whether we must prevent this instruction from executing with EXEC = 0.    bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; +  /// Returns true if the instruction could potentially depend on the value of +  /// exec. If false, exec dependencies may safely be ignored. +  bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; +    bool isInlineConstant(const APInt &Imm) const;    bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index f5724a71e1d..df06ba5a692 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -33,10 +33,22 @@ using namespace llvm;  namespace {  class SIOptimizeExecMaskingPreRA : public MachineFunctionPass { +private: +  const SIRegisterInfo *TRI; +  const SIInstrInfo *TII; +  MachineRegisterInfo *MRI; +  public: -  static char ID; +  MachineBasicBlock::iterator skipIgnoreExecInsts( +    MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const; + +    MachineBasicBlock::iterator skipIgnoreExecInstsTrivialSucc( +      MachineBasicBlock *&MBB, +      MachineBasicBlock::iterator It) const;  public: +  static char ID; +    SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) {      initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry());    } @@ -102,6 +114,45 @@ static MachineInstr* getOrExecSource(const MachineInstr &MI,    return SaveExecInst;  } +/// Skip over instructions that don't care about the exec mask. +MachineBasicBlock::iterator SIOptimizeExecMaskingPreRA::skipIgnoreExecInsts( +  MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const { +  for ( ; I != E; ++I) { +    if (TII->mayReadEXEC(*MRI, *I)) +      break; +  } + +  return I; +} + +// Skip to the next instruction, ignoring debug instructions, and trivial block +// boundaries (blocks that have one (typically fallthrough) successor, and the +// successor has one predecessor. +MachineBasicBlock::iterator +SIOptimizeExecMaskingPreRA::skipIgnoreExecInstsTrivialSucc( +  MachineBasicBlock *&MBB, +  MachineBasicBlock::iterator It) const { + +  do { +    It = skipIgnoreExecInsts(It, MBB->end()); +    if (It != MBB->end() || MBB->succ_size() != 1) +      break; + +    // If there is one trivial successor, advance to the next block. +    MachineBasicBlock *Succ = *MBB->succ_begin(); + +    // TODO: Is this really necessary? +    if (!MBB->isLayoutSuccessor(Succ)) +      break; + +    It = Succ->begin(); +    MBB = Succ; +  } while (true); + +  return It; +} + +  // Optimize sequence  //    %sel = V_CNDMASK_B32_e64 0, 1, %cc  //    %cmp = V_CMP_NE_U32 1, %1 @@ -227,8 +278,10 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {      return false;    const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); -  const SIRegisterInfo *TRI = ST.getRegisterInfo(); -  const SIInstrInfo *TII = ST.getInstrInfo(); +  TRI = ST.getRegisterInfo(); +  TII = ST.getInstrInfo(); +  MRI = &MF.getRegInfo(); +    MachineRegisterInfo &MRI = MF.getRegInfo();    LiveIntervals *LIS = &getAnalysis<LiveIntervals>();    DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI}); @@ -313,25 +366,9 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {      if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))        continue; -    const MachineBasicBlock* Succ = *MBB.succ_begin(); -    if (!MBB.isLayoutSuccessor(Succ)) -      continue; - -    auto I = std::next(Lead); - -    for ( ; I != E; ++I) { -      if (I->isDebugInstr()) -        continue; - -      if (!TII->isSALU(*I) || I->readsRegister(AMDGPU::EXEC, TRI)) -        break; -    } - -    if (I != E) -      continue; - -    auto NextLead = skipDebugInstructionsForward(Succ->begin(), Succ->end()); -    if (NextLead == Succ->end() || !isEndCF(*NextLead, TRI) || +    MachineBasicBlock *TmpMBB = &MBB; +    auto NextLead = skipIgnoreExecInstsTrivialSucc(TmpMBB, std::next(Lead)); +    if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, TRI) ||          !getOrExecSource(*NextLead, *TII, MRI))        continue; | 

