diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 81 |
3 files changed, 84 insertions, 22 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 7639a3fe138..370b8cf2cfb 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2453,6 +2453,27 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const return false; } +bool SIInstrInfo::mayReadEXEC(const MachineRegisterInfo &MRI, + const MachineInstr &MI) const { + if (MI.isMetaInstruction()) + return false; + + // This won't read exec if this is an SGPR->SGPR copy. + if (MI.isCopyLike()) { + if (!RI.isSGPRReg(MRI, MI.getOperand(0).getReg())) + return true; + + // Make sure this isn't copying exec as a normal operand + return MI.readsRegister(AMDGPU::EXEC, &RI); + } + + // Be conservative with any unhandled generic opcodes. + if (!isTargetSpecificOpcode(MI.getOpcode())) + return true; + + return !isSALU(MI) || MI.readsRegister(AMDGPU::EXEC, &RI); +} + bool SIInstrInfo::isInlineConstant(const APInt &Imm) const { switch (Imm.getBitWidth()) { case 32: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 51b5df93fef..13e3dbd3cfe 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -624,6 +624,10 @@ public: /// Whether we must prevent this instruction from executing with EXEC = 0. bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const; + /// Returns true if the instruction could potentially depend on the value of + /// exec. If false, exec dependencies may safely be ignored. + bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const; + bool isInlineConstant(const APInt &Imm) const; bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const; diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index f5724a71e1d..df06ba5a692 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -33,10 +33,22 @@ using namespace llvm; namespace { class SIOptimizeExecMaskingPreRA : public MachineFunctionPass { +private: + const SIRegisterInfo *TRI; + const SIInstrInfo *TII; + MachineRegisterInfo *MRI; + public: - static char ID; + MachineBasicBlock::iterator skipIgnoreExecInsts( + MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const; + + MachineBasicBlock::iterator skipIgnoreExecInstsTrivialSucc( + MachineBasicBlock *&MBB, + MachineBasicBlock::iterator It) const; public: + static char ID; + SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) { initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry()); } @@ -102,6 +114,45 @@ static MachineInstr* getOrExecSource(const MachineInstr &MI, return SaveExecInst; } +/// Skip over instructions that don't care about the exec mask. +MachineBasicBlock::iterator SIOptimizeExecMaskingPreRA::skipIgnoreExecInsts( + MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) const { + for ( ; I != E; ++I) { + if (TII->mayReadEXEC(*MRI, *I)) + break; + } + + return I; +} + +// Skip to the next instruction, ignoring debug instructions, and trivial block +// boundaries (blocks that have one (typically fallthrough) successor, and the +// successor has one predecessor. +MachineBasicBlock::iterator +SIOptimizeExecMaskingPreRA::skipIgnoreExecInstsTrivialSucc( + MachineBasicBlock *&MBB, + MachineBasicBlock::iterator It) const { + + do { + It = skipIgnoreExecInsts(It, MBB->end()); + if (It != MBB->end() || MBB->succ_size() != 1) + break; + + // If there is one trivial successor, advance to the next block. + MachineBasicBlock *Succ = *MBB->succ_begin(); + + // TODO: Is this really necessary? + if (!MBB->isLayoutSuccessor(Succ)) + break; + + It = Succ->begin(); + MBB = Succ; + } while (true); + + return It; +} + + // Optimize sequence // %sel = V_CNDMASK_B32_e64 0, 1, %cc // %cmp = V_CMP_NE_U32 1, %1 @@ -227,8 +278,10 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { return false; const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - const SIInstrInfo *TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + TII = ST.getInstrInfo(); + MRI = &MF.getRegInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); LiveIntervals *LIS = &getAnalysis<LiveIntervals>(); DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI}); @@ -313,25 +366,9 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI)) continue; - const MachineBasicBlock* Succ = *MBB.succ_begin(); - if (!MBB.isLayoutSuccessor(Succ)) - continue; - - auto I = std::next(Lead); - - for ( ; I != E; ++I) { - if (I->isDebugInstr()) - continue; - - if (!TII->isSALU(*I) || I->readsRegister(AMDGPU::EXEC, TRI)) - break; - } - - if (I != E) - continue; - - auto NextLead = skipDebugInstructionsForward(Succ->begin(), Succ->end()); - if (NextLead == Succ->end() || !isEndCF(*NextLead, TRI) || + MachineBasicBlock *TmpMBB = &MBB; + auto NextLead = skipIgnoreExecInstsTrivialSucc(TmpMBB, std::next(Lead)); + if (NextLead == TmpMBB->end() || !isEndCF(*NextLead, TRI) || !getOrExecSource(*NextLead, *TII, MRI)) continue; |