diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 70 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 16 |
4 files changed, 63 insertions, 29 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp index 7348b5b56c8..e1845e2e8e8 100644 --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -344,7 +344,7 @@ bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const { auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst); assert(DstOpnd && DstOpnd->isReg()); auto DPPMovReg = DstOpnd->getReg(); - if (execMayBeModifiedBeforeUse(*MRI, DPPMovReg, MovMI)) { + if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) { LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" " for all uses\n"); return false; diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index bcc3478c67b..74d77d32801 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -650,7 +650,7 @@ void SIFoldOperands::foldOperand( if (execMayBeModifiedBeforeUse(*MRI, UseMI->getOperand(UseOpIdx).getReg(), *OpToFold.getParent(), - UseMI)) + *UseMI)) return; UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32)); @@ -669,7 +669,7 @@ void SIFoldOperands::foldOperand( if (execMayBeModifiedBeforeUse(*MRI, UseMI->getOperand(UseOpIdx).getReg(), *OpToFold.getParent(), - UseMI)) + *UseMI)) return; // %vgpr = COPY %sgpr0 diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d855f3f0e42..34741850f82 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6269,47 +6269,75 @@ MachineInstr *llvm::getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, } bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, - unsigned VReg, + Register VReg, const MachineInstr &DefMI, - const MachineInstr *UseMI) { + const MachineInstr &UseMI) { assert(MRI.isSSA() && "Must be run on SSA"); auto *TRI = MRI.getTargetRegisterInfo(); auto *DefBB = DefMI.getParent(); - if (UseMI) { + // Don't bother searching between blocks, although it is possible this block + // doesn't modify exec. + if (UseMI.getParent() != DefBB) + return true; + + const int MaxInstScan = 20; + int NumInst = 0; + + // Stop scan at the use. + auto E = UseMI.getIterator(); + for (auto I = std::next(DefMI.getIterator()); I != E; ++I) { + if (I->isDebugInstr()) + continue; + + if (++NumInst > MaxInstScan) + return true; + + if (I->modifiesRegister(AMDGPU::EXEC, TRI)) + return true; + } + + return false; +} + +bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, + Register VReg, + const MachineInstr &DefMI) { + assert(MRI.isSSA() && "Must be run on SSA"); + + auto *TRI = MRI.getTargetRegisterInfo(); + auto *DefBB = DefMI.getParent(); + + const int MaxUseInstScan = 10; + int NumUseInst = 0; + + for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) { // Don't bother searching between blocks, although it is possible this block // doesn't modify exec. - if (UseMI->getParent() != DefBB) + if (UseInst.getParent() != DefBB) return true; - } else { - int NumUse = 0; - const int MaxUseScan = 10; - - for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) { - if (UseInst.getParent() != DefBB) - return true; - if (NumUse++ > MaxUseScan) - return true; - } + if (++NumUseInst > MaxUseInstScan) + return true; } const int MaxInstScan = 20; - int NumScan = 0; + int NumInst = 0; - // Stop scan at the use if known. - auto E = UseMI ? UseMI->getIterator() : DefBB->end(); - for (auto I = std::next(DefMI.getIterator()); I != E; ++I) { + // Stop scan when we have seen all the uses. + for (auto I = std::next(DefMI.getIterator()); ; ++I) { if (I->isDebugInstr()) continue; - if (NumScan++ > MaxInstScan) + if (++NumInst > MaxInstScan) return true; + if (I->readsRegister(VReg)) + if (--NumUseInst == 0) + return false; + if (I->modifiesRegister(AMDGPU::EXEC, TRI)) return true; } - - return false; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index b8275b62272..1f3c659f9d9 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1016,13 +1016,19 @@ MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI); /// \brief Return false if EXEC is not changed between the def of \p VReg at \p -/// DefMI and uses. If \p UseMI is not specified, this checks all uses of \p -/// VReg. Should be run on SSA. Currently does not attempt to track between -/// blocks. +/// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not +/// attempt to track between blocks. bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, - unsigned VReg, + Register VReg, const MachineInstr &DefMI, - const MachineInstr *UseMI = nullptr); + const MachineInstr &UseMI); + +/// \brief Return false if EXEC is not changed between the def of \p VReg at \p +/// DefMI and all its uses. Should be run on SSA. Currently does not attempt to +/// track between blocks. +bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, + Register VReg, + const MachineInstr &DefMI); namespace AMDGPU { |