diff options
author | Sanjoy Das <sanjoy@playingwithpointers.com> | 2015-07-27 23:27:51 +0000 |
---|---|---|
committer | Sanjoy Das <sanjoy@playingwithpointers.com> | 2015-07-27 23:27:51 +0000 |
commit | 93b3504aa82db9fe6445306277528d2512d64764 (patch) | |
tree | 6161f20bb53d252067bacb68791c898ae7259dd2 /llvm/lib | |
parent | c3182d8c4320c9f76fb2cf01178bd6a3d340cb5b (diff) | |
download | bcm5719-llvm-93b3504aa82db9fe6445306277528d2512d64764.tar.gz bcm5719-llvm-93b3504aa82db9fe6445306277528d2512d64764.zip |
[LSR] Generate and use zero extends
Summary:
If a scale or a base register can be rewritten as "Zext({A,+,1})" then
LSR will now consider a formula of that form in its normal cost
computation.
Depends on D9180
Reviewers: qcolombet, atrick
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D9181
llvm-svn: 243348
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 160 |
1 files changed, 139 insertions, 21 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 773777ac804..059b10ef73f 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -256,9 +256,22 @@ struct Formula { /// live in an add immediate field rather than a register. int64_t UnfoldedOffset; + /// ZeroExtendScaledReg - This formula zero extends the scale register to + /// ZeroExtendType before its use. + bool ZeroExtendScaledReg; + + /// ZeroExtendBaseReg - This formula zero extends all the base registers to + /// ZeroExtendType before their use. + bool ZeroExtendBaseReg; + + /// ZeroExtendType - The destination type of the zero extension implied by + /// the above two booleans. + Type *ZeroExtendType; + Formula() : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0), - ScaledReg(nullptr), UnfoldedOffset(0) {} + ScaledReg(nullptr), UnfoldedOffset(0), ZeroExtendScaledReg(false), + ZeroExtendBaseReg(false), ZeroExtendType(nullptr) {} void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); @@ -413,10 +426,12 @@ size_t Formula::getNumRegs() const { /// getType - Return the type of this formula, if it has one, or null /// otherwise. This type is meaningless except for the bit size. Type *Formula::getType() const { - return !BaseRegs.empty() ? BaseRegs.front()->getType() : - ScaledReg ? ScaledReg->getType() : - BaseGV ? BaseGV->getType() : - nullptr; + return ZeroExtendType + ? ZeroExtendType + : !BaseRegs.empty() + ? BaseRegs.front()->getType() + : ScaledReg ? ScaledReg->getType() + : BaseGV ? BaseGV->getType() : nullptr; } /// DeleteBaseReg - Delete the given base reg from the BaseRegs list. @@ -457,7 +472,10 @@ void Formula::print(raw_ostream &OS) const { } for (const SCEV *BaseReg : BaseRegs) { if (!First) OS << " + "; else First = false; - OS << "reg(" << *BaseReg << ')'; + if (ZeroExtendBaseReg) + OS << "reg(zext " << *BaseReg << " to " << *ZeroExtendType << ')'; + else + OS << "reg(" << *BaseReg << ')'; } if (HasBaseReg && BaseRegs.empty()) { if (!First) OS << " + "; else First = false; @@ -469,9 +487,12 @@ void Formula::print(raw_ostream &OS) const { if (Scale != 0) { if (!First) OS << " + "; else First = false; OS << Scale << "*reg("; - if (ScaledReg) - OS << *ScaledReg; - else + if (ScaledReg) { + if (ZeroExtendScaledReg) + OS << "(zext " << *ScaledReg << " to " << *ZeroExtendType << ')'; + else + OS << *ScaledReg; + } else OS << "<unknown>"; OS << ')'; } @@ -1732,6 +1753,7 @@ class LSRInstance { void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateZExts(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateCrossUseConstantOffsets(); void GenerateAllReuseFormulae(); @@ -3627,6 +3649,64 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { } } +/// GenerateZExts - If a scale or a base register can be rewritten as +/// "Zext({A,+,1})" then consider a formula of that form. +void LSRInstance::GenerateZExts(LSRUse &LU, unsigned LUIdx, Formula Base) { + // Don't bother with symbolic values. + if (Base.BaseGV) + return; + + auto CanBeNarrowed = [&](const SCEV *Reg) -> const SCEV * { + // Check if the register is an increment can be rewritten as zext(R) where + // the zext is free. + + const auto *RegAR = dyn_cast_or_null<SCEVAddRecExpr>(Reg); + if (!RegAR) + return nullptr; + + const auto *ZExtStart = dyn_cast<SCEVZeroExtendExpr>(RegAR->getStart()); + const auto *ConstStep = + dyn_cast<SCEVConstant>(RegAR->getStepRecurrence(SE)); + if (!ZExtStart || !ConstStep || ConstStep->getValue()->getValue() != 1) + return nullptr; + + const SCEV *NarrowStart = ZExtStart->getOperand(); + if (!TTI.isZExtFree(NarrowStart->getType(), ZExtStart->getType())) + return nullptr; + + const auto *NarrowAR = dyn_cast<SCEVAddRecExpr>( + SE.getAddRecExpr(NarrowStart, SE.getConstant(NarrowStart->getType(), 1), + RegAR->getLoop(), RegAR->getNoWrapFlags())); + + if (!NarrowAR || !NarrowAR->getNoWrapFlags(SCEV::FlagNUW)) + return nullptr; + + return NarrowAR; + }; + + if (Base.ScaledReg && !Base.ZeroExtendType) + if (const SCEV *S = CanBeNarrowed(Base.ScaledReg)) { + Formula F = Base; + F.ZeroExtendType = Base.ScaledReg->getType(); + F.ZeroExtendScaledReg = true; + F.ScaledReg = S; + + if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + InsertFormula(LU, LUIdx, F); + } + + if (Base.BaseRegs.size() == 1 && !Base.ZeroExtendType) + if (const SCEV *S = CanBeNarrowed(Base.BaseRegs[0])) { + Formula F = Base; + F.ZeroExtendType = Base.BaseRegs[0]->getType(); + F.ZeroExtendBaseReg = true; + F.BaseRegs[0] = S; + + if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + InsertFormula(LU, LUIdx, F); + } +} + namespace { /// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to @@ -3846,6 +3926,8 @@ LSRInstance::GenerateAllReuseFormulae() { LSRUse &LU = Uses[LUIdx]; for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) GenerateTruncates(LU, LUIdx, LU.Formulae[i]); + for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) + GenerateZExts(LU, LUIdx, LU.Formulae[i]); } GenerateCrossUseConstantOffsets(); @@ -4483,13 +4565,28 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // If we're expanding for a post-inc user, make the post-inc adjustment. PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); - Reg = TransformForPostIncUse(Denormalize, Reg, - LF.UserInst, LF.OperandValToReplace, - Loops, SE, DT); - - Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP))); + const SCEV *ExtendedReg = + F.ZeroExtendBaseReg ? SE.getZeroExtendExpr(Reg, F.ZeroExtendType) : Reg; + + const SCEV *PostIncReg = + TransformForPostIncUse(Denormalize, ExtendedReg, LF.UserInst, + LF.OperandValToReplace, Loops, SE, DT); + if (PostIncReg == ExtendedReg) { + Value *Expanded = Rewriter.expandCodeFor(Reg, nullptr, IP); + if (F.ZeroExtendBaseReg) + Expanded = new ZExtInst(Expanded, F.ZeroExtendType, "", IP); + Ops.push_back(SE.getUnknown(Expanded)); + } else { + Ops.push_back( + SE.getUnknown(Rewriter.expandCodeFor(PostIncReg, nullptr, IP))); + } } + // Note on post-inc uses and zero extends -- since the no-wrap behavior for + // the post-inc SCEV can be different from the no-wrap behavior of the pre-inc + // SCEV, if a post-inc transform is required we do the zero extension on the + // pre-inc expression before doing the post-inc transform. + // Expand the ScaledReg portion. Value *ICmpScaledV = nullptr; if (F.Scale != 0) { @@ -4497,22 +4594,33 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // If we're expanding for a post-inc user, make the post-inc adjustment. PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); - ScaledS = TransformForPostIncUse(Denormalize, ScaledS, - LF.UserInst, LF.OperandValToReplace, - Loops, SE, DT); + const SCEV *ExtendedScaleS = + F.ZeroExtendScaledReg ? SE.getZeroExtendExpr(ScaledS, F.ZeroExtendType) + : ScaledS; + const SCEV *PostIncScaleS = + TransformForPostIncUse(Denormalize, ExtendedScaleS, LF.UserInst, + LF.OperandValToReplace, Loops, SE, DT); if (LU.Kind == LSRUse::ICmpZero) { // Expand ScaleReg as if it was part of the base regs. + Value *Expanded = nullptr; + if (PostIncScaleS == ExtendedScaleS) { + Expanded = Rewriter.expandCodeFor(ScaledS, nullptr, IP); + if (F.ZeroExtendScaledReg) + Expanded = new ZExtInst(Expanded, F.ZeroExtendType, "", IP); + } else { + Expanded = Rewriter.expandCodeFor(PostIncScaleS, nullptr, IP); + } + if (F.Scale == 1) - Ops.push_back( - SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP))); + Ops.push_back(SE.getUnknown(Expanded)); else { // An interesting way of "folding" with an icmp is to use a negated // scale, which we'll implement by inserting it into the other operand // of the icmp. assert(F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"); - ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP); + ICmpScaledV = Expanded; } } else { // Otherwise just expand the scaled register and an explicit scale, @@ -4526,7 +4634,17 @@ Value *LSRInstance::Expand(const LSRFixup &LF, Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } - ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)); + + Value *Expanded = nullptr; + if (PostIncScaleS == ExtendedScaleS) { + Expanded = Rewriter.expandCodeFor(ScaledS, nullptr, IP); + if (F.ZeroExtendScaledReg) + Expanded = new ZExtInst(Expanded, F.ZeroExtendType, "", IP); + } else { + Expanded = Rewriter.expandCodeFor(PostIncScaleS, nullptr, IP); + } + + ScaledS = SE.getUnknown(Expanded); if (F.Scale != 1) ScaledS = SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); |