diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 81 |
1 files changed, 59 insertions, 22 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index f5724a71e1d..df06ba5a692 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -33,10 +33,22 @@ using namespace llvm; namespace { class SIOptimizeExecMaskingPreRA : public MachineFunctionPass { +private: + const SIRegisterInfo *TRI; + const SIInstrInfo *TII; + MachineRegisterInfo *MRI; + public: - static char ID; + MachineBasicBlock::iterator skipIgnoreExecInsts( + MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const; + + MachineBasicBlock::iterator skipIgnoreExecInstsTrivialSucc( + MachineBasicBlock *&MBB, + MachineBasicBlock::iterator It) const; public: + static char ID; + SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) { initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry()); } @@ -102,6 +114,45 @@ static MachineInstr* getOrExecSource(const MachineInstr &MI, return SaveExecInst; } +/// Skip over instructions that don't care about the exec mask. +MachineBasicBlock::iterator SIOptimizeExecMaskingPreRA::skipIgnoreExecInsts( + MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const { + for ( ; I != E; ++I) { + if (TII->mayReadEXEC(*MRI, *I)) + break; + } + + return I; +} + +// Skip to the next instruction, ignoring debug instructions, and trivial block +// boundaries (blocks that have one (typically fallthrough) successor, and the +// successor has one predecessor. +MachineBasicBlock::iterator +SIOptimizeExecMaskingPreRA::skipIgnoreExecInstsTrivialSucc( + MachineBasicBlock *&MBB, + MachineBasicBlock::iterator It) const { + + do { + It = skipIgnoreExecInsts(It, MBB->end()); + if (It != MBB->end() || MBB->succ_size() != 1) + break; + + // If there is one trivial successor, advance to the next block. + MachineBasicBlock *Succ = *MBB->succ_begin(); + + // TODO: Is this really necessary? + if (!MBB->isLayoutSuccessor(Succ)) + break; + + It = Succ->begin(); + MBB = Succ; + } while (true); + + return It; +} + + // Optimize sequence // %sel = V_CNDMASK_B32_e64 0, 1, %cc // %cmp = V_CMP_NE_U32 1, %1 @@ -227,8 +278,10 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { return false; const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const SIInstrInfo *TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); LiveIntervals *LIS = &getAnalysis<LiveIntervals>(); DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI}); @@ -313,25 +366,9 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI)) continue; - const MachineBasicBlock* Succ = *MBB.succ_begin(); - if (!MBB.isLayoutSuccessor(Succ)) - continue; - - auto I = std::next(Lead); - - for ( ; I != E; ++I) { - if (I->isDebugInstr()) - continue; - - if (!TII->isSALU(*I) || I->readsRegister(AMDGPU::EXEC, TRI)) - break; - } - - if (I != E) - continue; - - auto NextLead = skipDebugInstructionsForward(Succ->begin(), Succ->end()); - if (NextLead == Succ->end() || !isEndCF(*NextLead, TRI) || + MachineBasicBlock *TmpMBB = &MBB; + auto NextLead = skipIgnoreExecInstsTrivialSucc(TmpMBB, std::next(Lead)); + if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, TRI) || !getOrExecSource(*NextLead, *TII, MRI)) continue; |