diff options
author | Sam Parker <sam.parker@arm.com> | 2019-12-20 09:32:36 +0000 |
---|---|---|
committer | Sam Parker <sam.parker@arm.com> | 2019-12-20 09:34:18 +0000 |
commit | acbc9aed726d4b7428691e026a214cb26ee2cf94 (patch) | |
tree | 93e611317cbbc73893c2dad4d901d55b864d79f9 /llvm/lib | |
parent | 0ca9d2fd39264054501927ba6d3c5330159458d7 (diff) | |
download | bcm5719-llvm-acbc9aed726d4b7428691e026a214cb26ee2cf94.tar.gz bcm5719-llvm-acbc9aed726d4b7428691e026a214cb26ee2cf94.zip |
[ARM][MVE] Fixes for tail predication.
1) Fix an issue with the incorrect value being used for the number of
elements being passed to [d|w]lstp. We were trying to check that
the value was available at LoopStart, but this doesn't consider
that the last instruction in the block could also define the
register. Two helpers have been added to RDA for this.
2) Insert some code to now try to move the element count def or the
insertion point so that we can perform more tail predication.
3) Related to (1), the same off-by-one could prevent us from
generating a low-overhead loop when a mov lr could have been
the last instruction in the block.
4) Fix up some instruction attributes so that not all the
low-overhead loop instructions are labelled as branches and
terminators - as this is not true for dls/dlstp.
Differential Revision: https://reviews.llvm.org/D71609
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/ReachingDefAnalysis.cpp | 46 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrThumb2.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp | 59 |
4 files changed, 102 insertions, 17 deletions
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp index e5b422e0b7e..61ae3b75ab5 100644 --- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -231,15 +231,15 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg, MachineBasicBlock *MBB = Def->getParent(); MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def); while (++MI != MBB->end()) { + // If/when we find a new reaching def, we know that there's no more uses + // of 'Def'. + if (getReachingMIDef(&*MI, PhysReg) != Def) + return; + for (auto &MO : MI->operands()) { if (!MO.isReg() || !MO.isUse() || MO.getReg() != PhysReg) continue; - // If/when we find a new reaching def, we know that there's no more uses - // of 'Def'. - if (getReachingMIDef(&*MI, PhysReg) != Def) - return; - Uses.push_back(&*MI); if (MO.isKill()) return; @@ -272,6 +272,42 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) { return false; } +bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) { + MachineBasicBlock *MBB = MI->getParent(); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + if (!LiveRegs.contains(PhysReg)) + return false; + + MachineInstr *Last = &MBB->back(); + int Def = getReachingDef(MI, PhysReg); + if (getReachingDef(Last, PhysReg) != Def) + return false; + + // Finally check that the last instruction doesn't redefine the register. + for (auto &MO : Last->operands()) + if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg) + return false; + + return true; +} + +MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, + int PhysReg) { + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + if (!LiveRegs.contains(PhysReg)) + return nullptr; + + MachineInstr *Last = &MBB->back(); + int Def = getReachingDef(Last, PhysReg); + for (auto &MO : Last->operands()) + if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg) + return Last; + + return Def < 0 ? nullptr : getInstFromId(MBB, Def); +} + MachineInstr *ReachingDefAnalysis::getInstWithUseBefore(MachineInstr *MI, int PhysReg) { auto I = MachineBasicBlock::reverse_iterator(MI); diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 1b631b4b9e1..89458060585 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -5717,6 +5717,8 @@ class MVE_WLSTP<string asm, bits<2> size> let Inst{13} = 0b0; let Inst{11} = label{0}; let Inst{10-1} = label{10-1}; + let isBranch = 1; + let isTerminator = 1; } def MVE_DLSTP_8 : MVE_DLSTP<"dlstp.8", 0b00>; @@ -5745,6 +5747,8 @@ def MVE_LETP : MVE_loltp_end<(outs GPRlr:$LRout), let Inst{13} = 0b0; let Inst{11} = label{0}; let Inst{10-1} = label{10-1}; + let isBranch = 1; + let isTerminator = 1; } def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index 00921930e71..d07d6ec119f 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -5189,8 +5189,6 @@ class t2LOL<dag oops, dag iops, string asm, string ops> let Inst{31-23} = 0b111100000; let Inst{15-14} = 0b11; let Inst{0} = 0b1; - let isBranch = 1; - let isTerminator = 1; let DecoderMethod = "DecodeLOLoop"; let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB]; } @@ -5207,13 +5205,13 @@ def t2WLS : t2LOL<(outs GPRlr:$LR), let Inst{11} = label{0}; let Inst{10-1} = label{10-1}; let usesCustomInserter = 1; + let isBranch = 1; + let isTerminator = 1; } def t2DLS : t2LOL<(outs GPRlr:$LR), (ins rGPR:$Rn), "dls", "$LR, $Rn"> { bits<4> Rn; - let isBranch = 0; - let isTerminator = 0; let Inst{22-20} = 0b100; let Inst{19-16} = Rn{3-0}; let Inst{13-1} = 0b1000000000000; @@ -5229,6 +5227,8 @@ def t2LEUpdate : t2LOL<(outs GPRlr:$LRout), let Inst{11} = label{0}; let Inst{10-1} = label{10-1}; let usesCustomInserter = 1; + let isBranch = 1; + let isTerminator = 1; } def t2LE : t2LOL<(outs ), (ins lelabel_u11:$label), "le", "$label"> { @@ -5237,6 +5237,8 @@ def t2LE : t2LOL<(outs ), (ins lelabel_u11:$label), "le", "$label"> { let Inst{13-12} = 0b00; let Inst{11} = label{0}; let Inst{10-1} = label{10-1}; + let isBranch = 1; + let isTerminator = 1; } def t2DoLoopStart : diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index ec62a6975f0..90bed3243ec 100644 --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -23,6 +23,7 @@ #include "ARMBasicBlockInfo.h" #include "ARMSubtarget.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineLoopUtils.h" @@ -268,12 +269,15 @@ MachineInstr *LowOverheadLoop::IsSafeToDefineLR(ReachingDefAnalysis *RDA) { // Find an insertion point: // - Is there a (mov lr, Count) before Start? If so, and nothing else writes // to Count before Start, we can insert at that mov. + if (auto *LRDef = RDA->getReachingMIDef(Start, ARM::LR)) + if (IsMoveLR(LRDef) && RDA->hasSameReachingDef(Start, LRDef, CountReg)) + return LRDef; + // - Is there a (mov lr, Count) after Start? If so, and nothing else writes // to Count after Start, we can insert at that mov. - if (auto *LRDef = RDA->getReachingMIDef(&MBB->back(), ARM::LR)) { + if (auto *LRDef = RDA->getLocalLiveOutMIDef(MBB, ARM::LR)) if (IsMoveLR(LRDef) && RDA->hasSameReachingDef(Start, LRDef, CountReg)) return LRDef; - } // We've found no suitable LR def and Start doesn't use LR directly. Can we // just define LR anyway? @@ -283,6 +287,32 @@ MachineInstr *LowOverheadLoop::IsSafeToDefineLR(ReachingDefAnalysis *RDA) { return nullptr; } +// Can we safely move 'From' to just before 'To'? To satisfy this, 'From' must +// not define a register that is used by any instructions, after and including, +// 'To'. These instructions also must not redefine any of Froms operands. +template<typename Iterator> +static bool IsSafeToMove(MachineInstr *From, MachineInstr *To, ReachingDefAnalysis *RDA) { + SmallSet<int, 2> Defs; + // First check that From would compute the same value if moved. + for (auto &MO : From->operands()) { + if (!MO.isReg() || MO.isUndef() || !MO.getReg()) + continue; + if (MO.isDef()) + Defs.insert(MO.getReg()); + else if (!RDA->hasSameReachingDef(From, To, MO.getReg())) + return false; + } + + // Now walk checking that the rest of the instructions will compute the same + // value. + for (auto I = ++Iterator(From), E = Iterator(To); I != E; ++I) { + for (auto &MO : I->operands()) + if (MO.isReg() && MO.getReg() && MO.isUse() && Defs.count(MO.getReg())) + return false; + } + return true; +} + void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA, MachineLoopInfo *MLI) { @@ -369,13 +399,26 @@ void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils, return; } - // We can't perform TP if the register does not hold the same value at - // InsertPt as the liveout value. + // The element count register maybe defined after InsertPt, in which case we + // need to try to move either InsertPt or the def so that the [w|d]lstp can + // use the value. MachineBasicBlock *InsertBB = InsertPt->getParent(); - if (!RDA->hasSameReachingDef(InsertPt, &InsertBB->back(), - NumElements)) { - CannotTailPredicate = true; - return; + if (!RDA->isReachingDefLiveOut(InsertPt, NumElements)) { + if (auto *ElemDef = RDA->getLocalLiveOutMIDef(InsertBB, NumElements)) { + if (IsSafeToMove<MachineBasicBlock::reverse_iterator>(ElemDef, InsertPt, RDA)) { + ElemDef->removeFromParent(); + InsertBB->insert(MachineBasicBlock::iterator(InsertPt), ElemDef); + LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: " + << *ElemDef); + } else if (IsSafeToMove<MachineBasicBlock::iterator>(InsertPt, ElemDef, RDA)) { + InsertPt->removeFromParent(); + InsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef), InsertPt); + LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef); + } else { + CannotTailPredicate = true; + return; + } + } } // Especially in the case of while loops, InsertBB may not be the |