diff options
author | Dan Gohman <gohman@apple.com> | 2010-04-07 22:27:08 +0000 |
---|---|---|
committer | Dan Gohman <gohman@apple.com> | 2010-04-07 22:27:08 +0000 |
commit | d006ab90dd86ff3bb0a6a20dcf399b3cc7082289 (patch) | |
tree | eeee1a8a9d4cf4811bf2d00e7fcf4ca8d6e96ebc | |
parent | 85ce9f4f3010c22253a5a0114e36b78a2142d016 (diff) | |
download | bcm5719-llvm-d006ab90dd86ff3bb0a6a20dcf399b3cc7082289.tar.gz bcm5719-llvm-d006ab90dd86ff3bb0a6a20dcf399b3cc7082289.zip |
Generalize IVUsers to track arbitrary expressions rather than expressions
explicitly split into stride-and-offset pairs. Also, add the
ability to track multiple post-increment loops on the same expression.
This refines the concept of "normalizing" SCEV expressions used for
to post-increment uses, and introduces a dedicated utility routine for
normalizing and denormalizing expressions.
This fixes the expansion of expressions which are post-increment users
of more than one loop at a time. More broadly, this takes LSR another
step closer to being able to reason about more than one loop at a time.
llvm-svn: 100699
-rw-r--r-- | llvm/include/llvm/Analysis/IVUsers.h | 66 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/ScalarEvolutionExpander.h | 29 | ||||
-rw-r--r-- | llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h | 78 | ||||
-rw-r--r-- | llvm/lib/Analysis/IVUsers.cpp | 242 | ||||
-rw-r--r-- | llvm/lib/Analysis/ScalarEvolutionExpander.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/Analysis/ScalarEvolutionNormalization.cpp | 150 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 43 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 118 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/multiple-loop-post-inc.ll | 277 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll | 2 |
10 files changed, 743 insertions, 279 deletions
diff --git a/llvm/include/llvm/Analysis/IVUsers.h b/llvm/include/llvm/Analysis/IVUsers.h index dc616ca2fd3..a887c830c95 100644 --- a/llvm/include/llvm/Analysis/IVUsers.h +++ b/llvm/include/llvm/Analysis/IVUsers.h @@ -16,6 +16,7 @@ #define LLVM_ANALYSIS_IVUSERS_H #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolutionNormalization.h" #include "llvm/Support/ValueHandle.h" namespace llvm { @@ -26,17 +27,18 @@ class Value; class IVUsers; class ScalarEvolution; class SCEV; +class IVUsers; /// IVStrideUse - Keep track of one use of a strided induction variable. /// The Expr member keeps track of the expression, User is the actual user /// instruction of the operand, and 'OperandValToReplace' is the operand of /// the User that is the use. class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> { + friend class IVUsers; public: - IVStrideUse(IVUsers *P, const SCEV *S, const SCEV *Off, + IVStrideUse(IVUsers *P, const SCEV *E, Instruction* U, Value *O) - : CallbackVH(U), Parent(P), Stride(S), Offset(Off), - OperandValToReplace(O), IsUseOfPostIncrementedValue(false) { + : CallbackVH(U), Parent(P), Expr(E), OperandValToReplace(O) { } /// getUser - Return the user instruction for this use. @@ -53,23 +55,15 @@ public: /// this IVStrideUse. IVUsers *getParent() const { return Parent; } - /// getStride - Return the expression for the stride for the use. - const SCEV *getStride() const { return Stride; } + /// getExpr - Return the expression for the use. + const SCEV *getExpr() const { return Expr; } - /// setStride - Assign a new stride to this use. - void setStride(const SCEV *Val) { - Stride = Val; + /// setExpr - Assign a new expression to this use. + void setExpr(const SCEV *Val) { + Expr = Val; } - /// getOffset - Return the offset to add to a theoretical induction - /// variable that starts at zero and counts up by the stride to compute - /// the value for the use. This always has the same type as the stride. - const SCEV *getOffset() const { return Offset; } - - /// setOffset - Assign a new offset to this use. - void setOffset(const SCEV *Val) { - Offset = Val; - } + const SCEV *getStride(const Loop *L) const; /// getOperandValToReplace - Return the Value of the operand in the user /// instruction that this IVStrideUse is representing. @@ -83,37 +77,30 @@ public: OperandValToReplace = Op; } - /// isUseOfPostIncrementedValue - True if this should use the - /// post-incremented version of this IV, not the preincremented version. - /// This can only be set in special cases, such as the terminating setcc - /// instruction for a loop or uses dominated by the loop. - bool isUseOfPostIncrementedValue() const { - return IsUseOfPostIncrementedValue; + /// getPostIncLoops - Return the set of loops for which the expression has + /// been adjusted to use post-inc mode. + const PostIncLoopSet &getPostIncLoops() const { + return PostIncLoops; } - /// setIsUseOfPostIncrmentedValue - set the flag that indicates whether - /// this is a post-increment use. - void setIsUseOfPostIncrementedValue(bool Val) { - IsUseOfPostIncrementedValue = Val; - } + /// transformToPostInc - Transform the expression to post-inc form for the + /// given loop. + void transformToPostInc(const Loop *L); private: /// Parent - a pointer to the IVUsers that owns this IVStrideUse. IVUsers *Parent; - /// Stride - The stride for this use. - const SCEV *Stride; - - /// Offset - The offset to add to the base induction expression. - const SCEV *Offset; + /// Expr - The expression for this use. + const SCEV *Expr; /// OperandValToReplace - The Value of the operand in the user instruction /// that this IVStrideUse is representing. WeakVH OperandValToReplace; - /// IsUseOfPostIncrementedValue - True if this should use the - /// post-incremented version of this IV, not the preincremented version. - bool IsUseOfPostIncrementedValue; + /// PostIncLoops - The set of loops for which Expr has been adjusted to + /// use post-inc mode. This corresponds with SCEVExpander's post-inc concept. + PostIncLoopSet PostIncLoops; /// Deleted - Implementation of CallbackVH virtual function to /// receive notification when the User is deleted. @@ -174,18 +161,13 @@ public: /// return true. Otherwise, return false. bool AddUsersIfInteresting(Instruction *I); - IVStrideUse &AddUser(const SCEV *Stride, const SCEV *Offset, + IVStrideUse &AddUser(const SCEV *Expr, Instruction *User, Value *Operand); /// getReplacementExpr - Return a SCEV expression which computes the /// value of the OperandValToReplace of the given IVStrideUse. const SCEV *getReplacementExpr(const IVStrideUse &U) const; - /// getCanonicalExpr - Return a SCEV expression which computes the - /// value of the SCEV of the given IVStrideUse, ignoring the - /// isUseOfPostIncrementedValue flag. - const SCEV *getCanonicalExpr(const IVStrideUse &U) const; - typedef ilist<IVStrideUse>::iterator iterator; typedef ilist<IVStrideUse>::const_iterator const_iterator; iterator begin() { return IVUses.begin(); } diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h index dc9b73bd566..baf6946b8cf 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -15,6 +15,7 @@ #define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionNormalization.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/TargetFolder.h" #include <set> @@ -32,12 +33,12 @@ namespace llvm { InsertedExpressions; std::set<Value*> InsertedValues; - /// PostIncLoop - When non-null, expanded addrecs referring to the given - /// loop expanded in post-inc mode. For example, expanding {1,+,1}<L> in - /// post-inc mode returns the add instruction that adds one to the phi - /// for {0,+,1}<L>, as opposed to a new phi starting at 1. This is only - /// supported in non-canonical mode. - const Loop *PostIncLoop; + /// PostIncLoops - Addrecs referring to any of the given loops are expanded + /// in post-inc mode. For example, expanding {1,+,1}<L> in post-inc mode + /// returns the add instruction that adds one to the phi for {0,+,1}<L>, + /// as opposed to a new phi starting at 1. This is only supported in + /// non-canonical mode. + PostIncLoopSet PostIncLoops; /// IVIncInsertPos - When this is non-null, addrecs expanded in the /// loop it indicates should be inserted with increments at @@ -62,7 +63,7 @@ namespace llvm { public: /// SCEVExpander - Construct a SCEVExpander in "canonical" mode. explicit SCEVExpander(ScalarEvolution &se) - : SE(se), PostIncLoop(0), IVIncInsertLoop(0), CanonicalMode(true), + : SE(se), IVIncInsertLoop(0), CanonicalMode(true), Builder(se.getContext(), TargetFolder(se.TD)) {} /// clear - Erase the contents of the InsertedExpressions map so that users @@ -89,14 +90,18 @@ namespace llvm { IVIncInsertPos = Pos; } - /// setPostInc - If L is non-null, enable post-inc expansion for addrecs - /// referring to the given loop. If L is null, disable post-inc expansion - /// completely. Post-inc expansion is only supported in non-canonical + /// setPostInc - Enable post-inc expansion for addrecs referring to the + /// given loops. Post-inc expansion is only supported in non-canonical /// mode. - void setPostInc(const Loop *L) { + void setPostInc(const PostIncLoopSet &L) { assert(!CanonicalMode && "Post-inc expansion is not supported in CanonicalMode"); - PostIncLoop = L; + PostIncLoops = L; + } + + /// clearPostInc - Disable all post-inc expansion. + void clearPostInc() { + PostIncLoops.clear(); } /// disableCanonicalMode - Disable the behavior of expanding expressions in diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h b/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h new file mode 100644 index 00000000000..342e5937891 --- /dev/null +++ b/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h @@ -0,0 +1,78 @@ +//===- llvm/Analysis/ScalarEvolutionNormalization.h - See below -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities for working with "normalized" ScalarEvolution +// expressions. +// +// The following example illustrates post-increment uses and how normalized +// expressions help. +// +// for (i=0; i!=n; ++i) { +// ... +// } +// use(i); +// +// While the expression for most uses of i inside the loop is {0,+,1}<%L>, the +// expression for the use of i outside the loop is {1,+,1}<%L>, since i is +// incremented at the end of the loop body. This is inconveient, since it +// suggests that we need two different induction variables, one that starts +// at 0 and one that starts at 1. We'd prefer to be able to think of these as +// the same induction variable, with uses inside the loop using the +// "pre-incremented" value, and uses after the loop using the +// "post-incremented" value. +// +// Expressions for post-incremented uses are represented as an expression +// paired with a set of loops for which the expression is in "post-increment" +// mode (there may be multiple loops). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H +#define LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H + +#include "llvm/ADT/SmallPtrSet.h" + +namespace llvm { + +class Instruction; +class DominatorTree; +class Loop; +class ScalarEvolution; +class SCEV; +class Value; + +/// TransformKind - Different types of transformations that +/// TransformForPostIncUse can do. +enum TransformKind { + /// Normalize - Normalize according to the given loops. + Normalize, + /// NormalizeAutodetect - Detect post-inc opportunities on new expressions, + /// update the given loop set, and normalize. + NormalizeAutodetect, + /// Denormalize - Perform the inverse transform on the expression with the + /// given loop set. + Denormalize +}; + +/// PostIncLoopSet - A set of loops. +typedef SmallPtrSet<const Loop *, 2> PostIncLoopSet; + +/// TransformForPostIncUse - Transform the given expression according to the +/// given transformation kind. +const SCEV *TransformForPostIncUse(TransformKind Kind, + const SCEV *S, + Instruction *User, + Value *OperandValToReplace, + PostIncLoopSet &Loops, + ScalarEvolution &SE, + DominatorTree &DT); + +} + +#endif diff --git a/llvm/lib/Analysis/IVUsers.cpp b/llvm/lib/Analysis/IVUsers.cpp index 47b5d4a0f08..467f9dd840b 100644 --- a/llvm/lib/Analysis/IVUsers.cpp +++ b/llvm/lib/Analysis/IVUsers.cpp @@ -62,120 +62,34 @@ static void CollectSubexprs(const SCEV *S, Ops.push_back(S); } -/// getSCEVStartAndStride - Compute the start and stride of this expression, -/// returning false if the expression is not a start/stride pair, or true if it -/// is. The stride must be a loop invariant expression, but the start may be -/// a mix of loop invariant and loop variant expressions. The start cannot, -/// however, contain an AddRec from a different loop, unless that loop is an -/// outer loop of the current loop. -static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop, - const SCEV *&Start, const SCEV *&Stride, - ScalarEvolution *SE, DominatorTree *DT) { - const SCEV *TheAddRec = Start; // Initialize to zero. - - // If the outer level is an AddExpr, the operands are all start values except - // for a nested AddRecExpr. - if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(SH)) { - for (unsigned i = 0, e = AE->getNumOperands(); i != e; ++i) - if (const SCEVAddRecExpr *AddRec = - dyn_cast<SCEVAddRecExpr>(AE->getOperand(i))) - TheAddRec = SE->getAddExpr(AddRec, TheAddRec); - else - Start = SE->getAddExpr(Start, AE->getOperand(i)); - } else if (isa<SCEVAddRecExpr>(SH)) { - TheAddRec = SH; - } else { - return false; // not analyzable. - } - - // Break down TheAddRec into its component parts. - SmallVector<const SCEV *, 4> Subexprs; - CollectSubexprs(TheAddRec, Subexprs, *SE); - - // Look for an addrec on the current loop among the parts. - const SCEV *AddRecStride = 0; - for (SmallVectorImpl<const SCEV *>::iterator I = Subexprs.begin(), - E = Subexprs.end(); I != E; ++I) { - const SCEV *S = *I; - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) - if (AR->getLoop() == L) { - *I = AR->getStart(); - AddRecStride = AR->getStepRecurrence(*SE); - break; - } - } - if (!AddRecStride) - return false; - - // Add up everything else into a start value (which may not be - // loop-invariant). - const SCEV *AddRecStart = SE->getAddExpr(Subexprs); - - // Use getSCEVAtScope to attempt to simplify other loops out of - // the picture. - AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop); - - Start = SE->getAddExpr(Start, AddRecStart); - - // If stride is an instruction, make sure it properly dominates the header. - // Otherwise we could end up with a use before def situation. - if (!isa<SCEVConstant>(AddRecStride)) { - BasicBlock *Header = L->getHeader(); - if (!AddRecStride->properlyDominates(Header, DT)) - return false; +/// isInteresting - Test whether the given expression is "interesting" when +/// used by the given expression, within the context of analyzing the +/// given loop. +static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L) { + // Anything loop-invariant is interesting. + if (!isa<SCEVUnknown>(S) && S->isLoopInvariant(L)) + return true; - DEBUG(dbgs() << "["; - WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); - dbgs() << "] Variable stride: " << *AddRecStride << "\n"); + // An addrec is interesting if it's affine or if it has an interesting start. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // Keep things simple. Don't touch loop-variant strides. + if (AR->getLoop() == L && (AR->isAffine() || !L->contains(I))) + return true; + // Otherwise recurse to see if the start value is interesting. + return isInteresting(AR->getStart(), I, L); } - Stride = AddRecStride; - return true; -} - -/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression -/// and now we need to decide whether the user should use the preinc or post-inc -/// value. If this user should use the post-inc version of the IV, return true. -/// -/// Choosing wrong here can break dominance properties (if we choose to use the -/// post-inc value when we cannot) or it can end up adding extra live-ranges to -/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we -/// should use the post-inc value). -static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, - const Loop *L, DominatorTree *DT) { - // If the user is in the loop, use the preinc value. - if (L->contains(User)) return false; - - BasicBlock *LatchBlock = L->getLoopLatch(); - if (!LatchBlock) + // An add is interesting if any of its operands is. + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end(); + OI != OE; ++OI) + if (isInteresting(*OI, I, L)) + return true; return false; + } - // Ok, the user is outside of the loop. If it is dominated by the latch - // block, use the post-inc value. - if (DT->dominates(LatchBlock, User->getParent())) - return true; - - // There is one case we have to be careful of: PHI nodes. These little guys - // can live in blocks that are not dominated by the latch block, but (since - // their uses occur in the predecessor block, not the block the PHI lives in) - // should still use the post-inc value. Check for this case now. - PHINode *PN = dyn_cast<PHINode>(User); - if (!PN) return false; // not a phi, not dominated by latch block. - - // Look at all of the uses of IV by the PHI node. If any use corresponds to - // a block that is not dominated by the latch block, give up and use the - // preincremented value. - unsigned NumUses = 0; - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == IV) { - ++NumUses; - if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) - return false; - } - - // Okay, all uses of IV by PN are in predecessor blocks that really are - // dominated by the latch block. Use the post-incremented value. - return true; + // Nothing else is interesting here. + return false; } /// AddUsersIfInteresting - Inspect the specified instruction. If it is a @@ -196,16 +110,9 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { const SCEV *ISE = SE->getSCEV(I); if (isa<SCEVCouldNotCompute>(ISE)) return false; - // Get the start and stride for this expression. - Loop *UseLoop = LI->getLoopFor(I->getParent()); - const SCEV *Start = SE->getIntegerSCEV(0, ISE->getType()); - const SCEV *Stride = Start; - - if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT)) - return false; // Non-reducible symbolic expression, bail out. - - // Keep things simple. Don't touch loop-variant strides. - if (!Stride->isLoopInvariant(L) && L->contains(I)) + // If we've come to an uninteresting expression, stop the traversal and + // call this a user. + if (!isInteresting(ISE, I, L)) return false; SmallPtrSet<Instruction *, 4> UniqueUsers; @@ -241,27 +148,24 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { } if (AddUserToIVUsers) { - // Okay, we found a user that we cannot reduce. Analyze the instruction - // and decide what to do with it. If we are a use inside of the loop, use - // the value before incrementation, otherwise use it after incrementation. - if (IVUseShouldUsePostIncValue(User, I, L, DT)) { - // The value used will be incremented by the stride more than we are - // expecting, so subtract this off. - const SCEV *NewStart = SE->getMinusSCEV(Start, Stride); - IVUses.push_back(new IVStrideUse(this, Stride, NewStart, User, I)); - IVUses.back().setIsUseOfPostIncrementedValue(true); - DEBUG(dbgs() << " USING POSTINC SCEV, START=" << *NewStart<< "\n"); - } else { - IVUses.push_back(new IVStrideUse(this, Stride, Start, User, I)); - } + // Okay, we found a user that we cannot reduce. + IVUses.push_back(new IVStrideUse(this, ISE, User, I)); + IVStrideUse &NewUse = IVUses.back(); + // Transform the expression into a normalized form. + NewUse.Expr = + TransformForPostIncUse(NormalizeAutodetect, NewUse.Expr, + User, I, + NewUse.PostIncLoops, + *SE, *DT); + DEBUG(dbgs() << " NORMALIZED TO: " << *NewUse.Expr << '\n'); } } return true; } -IVStrideUse &IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset, +IVStrideUse &IVUsers::AddUser(const SCEV *Expr, Instruction *User, Value *Operand) { - IVUses.push_back(new IVStrideUse(this, Stride, Offset, User, Operand)); + IVUses.push_back(new IVStrideUse(this, Expr, User, Operand)); return IVUses.back(); } @@ -295,30 +199,10 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { /// getReplacementExpr - Return a SCEV expression which computes the /// value of the OperandValToReplace of the given IVStrideUse. const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { - // Start with zero. - const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); - // Create the basic add recurrence. - RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); - // Add the offset in a separate step, because it may be loop-variant. - RetVal = SE->getAddExpr(RetVal, U.getOffset()); - // For uses of post-incremented values, add an extra stride to compute - // the actual replacement value. - if (U.isUseOfPostIncrementedValue()) - RetVal = SE->getAddExpr(RetVal, U.getStride()); - return RetVal; -} - -/// getCanonicalExpr - Return a SCEV expression which computes the -/// value of the SCEV of the given IVStrideUse, ignoring the -/// isUseOfPostIncrementedValue flag. -const SCEV *IVUsers::getCanonicalExpr(const IVStrideUse &U) const { - // Start with zero. - const SCEV *RetVal = SE->getIntegerSCEV(0, U.getStride()->getType()); - // Create the basic add recurrence. - RetVal = SE->getAddRecExpr(RetVal, U.getStride(), L); - // Add the offset in a separate step, because it may be loop-variant. - RetVal = SE->getAddExpr(RetVal, U.getOffset()); - return RetVal; + PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(U.PostIncLoops); + return TransformForPostIncUse(Denormalize, U.getExpr(), + U.getUser(), U.getOperandValToReplace(), + Loops, *SE, *DT); } void IVUsers::print(raw_ostream &OS, const Module *M) const { @@ -339,8 +223,13 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { WriteAsOperand(OS, UI->getOperandValToReplace(), false); OS << " = " << *getReplacementExpr(*UI); - if (UI->isUseOfPostIncrementedValue()) - OS << " (post-inc)"; + for (PostIncLoopSet::const_iterator + I = UI->PostIncLoops.begin(), + E = UI->PostIncLoops.end(); I != E; ++I) { + OS << " (post-inc with loop "; + WriteAsOperand(OS, (*I)->getHeader(), false); + OS << ")"; + } OS << " in "; UI->getUser()->print(OS, &Annotator); OS << '\n'; @@ -356,6 +245,39 @@ void IVUsers::releaseMemory() { IVUses.clear(); } +static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + if (AR->getLoop() == L) + return AR; + return findAddRecForLoop(AR->getStart(), L); + } + + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) + if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L)) + return AR; + return 0; + } + + return 0; +} + +const SCEV *IVStrideUse::getStride(const Loop *L) const { + if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(), L)) + return AR->getStepRecurrence(*Parent->SE); + return 0; +} + +void IVStrideUse::transformToPostInc(const Loop *L) { + PostIncLoopSet Loops; + Loops.insert(L); + Expr = TransformForPostIncUse(Normalize, Expr, + getUser(), getOperandValToReplace(), + Loops, *Parent->SE, *Parent->DT); + PostIncLoops.insert(L); +} + void IVStrideUse::deleted() { // Remove this user from the list. Parent->IVUses.erase(this); diff --git a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 2e18ceac525..dd8ab431f34 100644 --- a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -966,9 +966,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Determine a normalized form of this expression, which is the expression // before any post-inc adjustment is made. const SCEVAddRecExpr *Normalized = S; - if (L == PostIncLoop) { - const SCEV *Step = S->getStepRecurrence(SE); - Normalized = cast<SCEVAddRecExpr>(SE.getMinusSCEV(S, Step)); + if (PostIncLoops.count(L)) { + PostIncLoopSet Loops; + Loops.insert(L); + Normalized = + cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0, + Loops, SE, *SE.DT)); } // Strip off any non-loop-dominating component from the addrec start. @@ -1002,7 +1005,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Accommodate post-inc mode, if necessary. Value *Result; - if (L != PostIncLoop) + if (!PostIncLoops.count(L)) Result = PN; else { // In PostInc mode, use the post-incremented value. @@ -1274,7 +1277,7 @@ Value *SCEVExpander::expand(const SCEV *S) { // If the SCEV is computable at this level, insert it into the header // after the PHIs (and after any other instructions that we've inserted // there) so that it is guaranteed to dominate any user inside the loop. - if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop) + if (L && S->hasComputableLoopEvolution(L) && !PostIncLoops.count(L)) InsertPt = L->getHeader()->getFirstNonPHI(); while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); @@ -1296,7 +1299,7 @@ Value *SCEVExpander::expand(const SCEV *S) { Value *V = visit(S); // Remember the expanded value for this SCEV at this location. - if (!PostIncLoop) + if (PostIncLoops.empty()) InsertedExpressions[std::make_pair(S, InsertPt)] = V; restoreInsertPoint(SaveInsertBB, SaveInsertPt); @@ -1304,7 +1307,7 @@ Value *SCEVExpander::expand(const SCEV *S) { } void SCEVExpander::rememberInstruction(Value *I) { - if (!PostIncLoop) + if (PostIncLoops.empty()) InsertedValues.insert(I); // If we just claimed an existing instruction and that instruction had diff --git a/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp new file mode 100644 index 00000000000..75c381d5efa --- /dev/null +++ b/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -0,0 +1,150 @@ +//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements utilities for working with "normalized" expressions. +// See the comments at the top of ScalarEvolutionNormalization.h for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionNormalization.h" +using namespace llvm; + +/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression +/// and now we need to decide whether the user should use the preinc or post-inc +/// value. If this user should use the post-inc version of the IV, return true. +/// +/// Choosing wrong here can break dominance properties (if we choose to use the +/// post-inc value when we cannot) or it can end up adding extra live-ranges to +/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we +/// should use the post-inc value). +static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV, + const Loop *L, DominatorTree *DT) { + // If the user is in the loop, use the preinc value. + if (L->contains(User)) return false; + + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) + return false; + + // Ok, the user is outside of the loop. If it is dominated by the latch + // block, use the post-inc value. + if (DT->dominates(LatchBlock, User->getParent())) + return true; + + // There is one case we have to be careful of: PHI nodes. These little guys + // can live in blocks that are not dominated by the latch block, but (since + // their uses occur in the predecessor block, not the block the PHI lives in) + // should still use the post-inc value. Check for this case now. + PHINode *PN = dyn_cast<PHINode>(User); + if (!PN) return false; // not a phi, not dominated by latch block. + + // Look at all of the uses of IV by the PHI node. If any use corresponds to + // a block that is not dominated by the latch block, give up and use the + // preincremented value. + unsigned NumUses = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == IV) { + ++NumUses; + if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i))) + return false; + } + + // Okay, all uses of IV by PN are in predecessor blocks that really are + // dominated by the latch block. Use the post-incremented value. + return true; +} + +const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, + const SCEV *S, + Instruction *User, + Value *OperandValToReplace, + PostIncLoopSet &Loops, + ScalarEvolution &SE, + DominatorTree &DT) { + if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S)) + return S; + if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) { + const SCEV *O = X->getOperand(); + const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, + Loops, SE, DT); + if (O != N) + switch (S->getSCEVType()) { + case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType()); + case scSignExtend: return SE.getSignExtendExpr(N, S->getType()); + case scTruncate: return SE.getTruncateExpr(N, S->getType()); + default: llvm_unreachable("Unexpected SCEVCastExpr kind!"); + } + return S; + } + if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) { + SmallVector<const SCEV *, 8> Operands; + bool Changed = false; + for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end(); + I != E; ++I) { + const SCEV *O = *I; + const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace, + Loops, SE, DT); + Changed |= N != O; + Operands.push_back(N); + } + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + // An addrec. This is the interesting part. + const Loop *L = AR->getLoop(); + const SCEV *Result = SE.getAddRecExpr(Operands, L); + switch (Kind) { + default: llvm_unreachable("Unexpected transform name!"); + case NormalizeAutodetect: + if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace)) + if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) { + Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); + Loops.insert(L); + } + break; + case Normalize: + if (Loops.count(L)) + Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); + break; + case Denormalize: + if (Loops.count(L)) { + const SCEV *TransformedStep = + TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), + User, OperandValToReplace, Loops, SE, DT); + Result = SE.getAddExpr(Result, TransformedStep); + } + break; + } + return Result; + } + if (Changed) + switch (S->getSCEVType()) { + case scAddExpr: return SE.getAddExpr(Operands); + case scMulExpr: return SE.getMulExpr(Operands); + case scSMaxExpr: return SE.getSMaxExpr(Operands); + case scUMaxExpr: return SE.getUMaxExpr(Operands); + default: llvm_unreachable("Unexpected SCEVNAryExpr kind!"); + } + return S; + } + if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) { + const SCEV *LO = X->getLHS(); + const SCEV *RO = X->getRHS(); + const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace, + Loops, SE, DT); + const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace, + Loops, SE, DT); + if (LO != LN || RO != RN) + return SE.getUDivExpr(LN, RN); + return S; + } + llvm_unreachable("Unexpected SCEV kind!"); + return 0; +} diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 6605666e45d..1a58b6644fc 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -454,6 +454,46 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { return Changed; } +// FIXME: It is an extremely bad idea to indvar substitute anything more +// complex than affine induction variables. Doing so will put expensive +// polynomial evaluations inside of the loop, and the str reduction pass +// currently can only reduce affine polynomials. For now just disable +// indvar subst on anything more complex than an affine addrec, unless +// it can be expanded to a trivial value. +static bool isSafe(const SCEV *S, const Loop *L) { + // Loop-invariant values are safe. + if (S->isLoopInvariant(L)) return true; + + // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how + // to transform them into efficient code. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) + return AR->isAffine(); + + // An add is safe it all its operands are safe. + if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) { + for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(), + E = Commutative->op_end(); I != E; ++I) + if (!isSafe(*I, L)) return false; + return true; + } + + // A cast is safe if its operand is. + if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) + return isSafe(C->getOperand(), L); + + // A udiv is safe if its operands are. + if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S)) + return isSafe(UD->getLHS(), L) && + isSafe(UD->getRHS(), L); + + // SCEVUnknown is always safe. + if (isa<SCEVUnknown>(S)) + return true; + + // Nothing else is safe. + return false; +} + void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { SmallVector<WeakVH, 16> DeadInsts; @@ -465,7 +505,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // the need for the code evaluation methods to insert induction variables // of different sizes. for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) { - const SCEV *Stride = UI->getStride(); Value *Op = UI->getOperandValToReplace(); const Type *UseTy = Op->getType(); Instruction *User = UI->getUser(); @@ -486,7 +525,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // currently can only reduce affine polynomials. For now just disable // indvar subst on anything more complex than an affine addrec, unless // it can be expanded to a trivial value. - if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L)) + if (!isSafe(AR, L)) continue; // Determine the insertion point for this user. By default, insert diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 625a75d6cca..631092b3260 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -781,10 +781,10 @@ struct LSRFixup { /// will be replaced. Value *OperandValToReplace; - /// PostIncLoop - If this user is to use the post-incremented value of an + /// PostIncLoops - If this user is to use the post-incremented value of an /// induction variable, this variable is non-null and holds the loop /// associated with the induction variable. - const Loop *PostIncLoop; + PostIncLoopSet PostIncLoops; /// LUIdx - The index of the LSRUse describing the expression which /// this fixup needs, minus an offset (below). @@ -795,6 +795,8 @@ struct LSRFixup { /// offsets, for example in an unrolled loop. int64_t Offset; + bool isUseFullyOutsideLoop(const Loop *L) const; + LSRFixup(); void print(raw_ostream &OS) const; @@ -804,9 +806,24 @@ struct LSRFixup { } LSRFixup::LSRFixup() - : UserInst(0), OperandValToReplace(0), PostIncLoop(0), + : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {} +/// isUseFullyOutsideLoop - Test whether this fixup always uses its +/// value outside of the given loop. +bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const { + // PHI nodes use their value in their incoming blocks. + if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) { + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == OperandValToReplace && + L->contains(PN->getIncomingBlock(i))) + return false; + return true; + } + + return !L->contains(UserInst); +} + void LSRFixup::print(raw_ostream &OS) const { OS << "UserInst="; // Store is common and interesting enough to be worth special-casing. @@ -821,9 +838,10 @@ void LSRFixup::print(raw_ostream &OS) const { OS << ", OperandValToReplace="; WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false); - if (PostIncLoop) { + for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(), + E = PostIncLoops.end(); I != E; ++I) { OS << ", PostIncLoop="; - WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false); + WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false); } if (LUIdx != ~size_t(0)) @@ -1545,8 +1563,9 @@ LSRInstance::OptimizeLoopTermCond() { !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { // Conservatively assume there may be reuse if the quotient of their // strides could be a legal scale. - const SCEV *A = CondUse->getStride(); - const SCEV *B = UI->getStride(); + const SCEV *A = CondUse->getStride(L); + const SCEV *B = UI->getStride(L); + if (!A || !B) continue; if (SE.getTypeSizeInBits(A->getType()) != SE.getTypeSizeInBits(B->getType())) { if (SE.getTypeSizeInBits(A->getType()) > @@ -1598,7 +1617,7 @@ LSRInstance::OptimizeLoopTermCond() { ExitingBlock->getInstList().insert(TermBr, Cond); // Clone the IVUse, as the old use still exists! - CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(), + CondUse = &IU.AddUser(CondUse->getExpr(), Cond, CondUse->getOperandValToReplace()); TermBr->replaceUsesOfWith(OldCond, Cond); } @@ -1607,9 +1626,7 @@ LSRInstance::OptimizeLoopTermCond() { // If we get to here, we know that we can transform the setcc instruction to // use the post-incremented version of the IV, allowing us to coalesce the // live ranges for the IV correctly. - CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(), - CondUse->getStride())); - CondUse->setIsUseOfPostIncrementedValue(true); + CondUse->transformToPostInc(L); Changed = true; PostIncs.insert(Cond); @@ -1717,19 +1734,24 @@ void LSRInstance::CollectInterestingTypesAndFactors() { SmallSetVector<const SCEV *, 4> Strides; // Collect interesting types and strides. + SmallVector<const SCEV *, 4> Worklist; for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { - const SCEV *Stride = UI->getStride(); + const SCEV *Expr = UI->getExpr(); // Collect interesting types. - Types.insert(SE.getEffectiveSCEVType(Stride->getType())); - - // Add the stride for this loop. - Strides.insert(Stride); - - // Add strides for other mentioned loops. - for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset()); - AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart())) - Strides.insert(AR->getStepRecurrence(SE)); + Types.insert(SE.getEffectiveSCEVType(Expr->getType())); + + // Add strides for mentioned loops. + Worklist.push_back(Expr); + do { + const SCEV *S = Worklist.pop_back_val(); + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { + Strides.insert(AR->getStepRecurrence(SE)); + Worklist.push_back(AR->getStart()); + } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { + Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end()); + } + } while (!Worklist.empty()); } // Compute interesting factors from the set of interesting strides. @@ -1776,8 +1798,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LSRFixup &LF = getNewFixup(); LF.UserInst = UI->getUser(); LF.OperandValToReplace = UI->getOperandValToReplace(); - if (UI->isUseOfPostIncrementedValue()) - LF.PostIncLoop = L; + LF.PostIncLoops = UI->getPostIncLoops(); LSRUse::KindType Kind = LSRUse::Basic; const Type *AccessTy = 0; @@ -1786,7 +1807,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { AccessTy = getAccessType(LF.UserInst); } - const SCEV *S = IU.getCanonicalExpr(*UI); + const SCEV *S = UI->getExpr(); // Equality (== and !=) ICmps are special. We can rewrite (i == N) as // (N - i == 0), and this allows (N - i) to be the expression that we work @@ -1824,7 +1845,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.LUIdx = P.first; LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; - LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst); + LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); // If this is the first use of this LSRUse, give it a formula. if (LU.Formulae.empty()) { @@ -1936,7 +1957,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { LF.LUIdx = P.first; LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; - LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst); + LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); InsertSupplementalFormula(U, LU, LF.LUIdx); CountRegisters(LU.Formulae.back(), Uses.size() - 1); break; @@ -2783,8 +2804,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF, SmallVectorImpl<WeakVH> &DeadInsts) const { const LSRUse &LU = Uses[LF.LUIdx]; - // Then, collect some instructions which we will remain dominated by when - // expanding the replacement. These must be dominated by any operands that + // Then, collect some instructions which must be dominated by the + // expanding replacement. These must be dominated by any operands that // will be required in the expansion. SmallVector<Instruction *, 4> Inputs; if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace)) @@ -2793,8 +2814,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF, if (Instruction *I = dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1))) Inputs.push_back(I); - if (LF.PostIncLoop) { - if (!L->contains(LF.UserInst)) + if (LF.PostIncLoops.count(L)) { + if (LF.isUseFullyOutsideLoop(L)) Inputs.push_back(L->getLoopLatch()->getTerminator()); else Inputs.push_back(IVIncInsertPos); @@ -2831,7 +2852,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, // Inform the Rewriter if we have a post-increment use, so that it can // perform an advantageous expansion. - Rewriter.setPostInc(LF.PostIncLoop); + Rewriter.setPostInc(LF.PostIncLoops); // This is the type that the user actually needs. const Type *OpTy = LF.OperandValToReplace->getType(); @@ -2855,24 +2876,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, const SCEV *Reg = *I; assert(!Reg->isZero() && "Zero allocated in a base register!"); - // If we're expanding for a post-inc user for the add-rec's loop, make the - // post-inc adjustment. - const SCEV *Start = Reg; - while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) { - if (AR->getLoop() == LF.PostIncLoop) { - Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE)); - // If the user is inside the loop, insert the code after the increment - // so that it is dominated by its operand. If the original insert point - // was already dominated by the increment, keep it, because there may - // be loop-variant operands that need to be respected also. - if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) { - IP = IVIncInsertPos; - while (isa<DbgInfoIntrinsic>(IP)) ++IP; - } - break; - } - Start = AR->getStart(); - } + // If we're expanding for a post-inc user, make the post-inc adjustment. + PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); + Reg = TransformForPostIncUse(Denormalize, Reg, + LF.UserInst, LF.OperandValToReplace, + Loops, SE, DT); Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); } @@ -2889,11 +2897,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, if (F.AM.Scale != 0) { const SCEV *ScaledS = F.ScaledReg; - // If we're expanding for a post-inc user for the add-rec's loop, make the - // post-inc adjustment. - if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS)) - if (AR->getLoop() == LF.PostIncLoop) - ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE)); + // If we're expanding for a post-inc user, make the post-inc adjustment. + PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); + ScaledS = TransformForPostIncUse(Denormalize, ScaledS, + LF.UserInst, LF.OperandValToReplace, + Loops, SE, DT); if (LU.Kind == LSRUse::ICmpZero) { // An interesting way of "folding" with an icmp is to use a negated @@ -2954,7 +2962,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP); // We're done expanding now, so reset the rewriter. - Rewriter.setPostInc(0); + Rewriter.clearPostInc(); // An ICmpZero Formula represents an ICmp which we're handling as a // comparison against zero. Now that we've expanded an expression for that diff --git a/llvm/test/CodeGen/X86/multiple-loop-post-inc.ll b/llvm/test/CodeGen/X86/multiple-loop-post-inc.ll new file mode 100644 index 00000000000..5feab18579d --- /dev/null +++ b/llvm/test/CodeGen/X86/multiple-loop-post-inc.ll @@ -0,0 +1,277 @@ +; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s +; rdar://7236213 + +; CodeGen shouldn't require any lea instructions inside the marked loop. +; It should properly set up post-increment uses and do coalescing for +; the induction variables. + +; CHECK: # Start +; CHECK-NOT: lea +; CHECK: # Stop + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +define void @foo(float* %I, i64 %IS, float* nocapture %Start, float* nocapture %Step, float* %O, i64 %OS, i64 %N) nounwind { +entry: + %times4 = alloca float, align 4 ; <float*> [#uses=3] + %timesN = alloca float, align 4 ; <float*> [#uses=2] + %0 = load float* %Step, align 4 ; <float> [#uses=8] + %1 = ptrtoint float* %I to i64 ; <i64> [#uses=1] + %2 = ptrtoint float* %O to i64 ; <i64> [#uses=1] + %tmp = xor i64 %2, %1 ; <i64> [#uses=1] + %tmp16 = and i64 %tmp, 15 ; <i64> [#uses=1] + %3 = icmp eq i64 %tmp16, 0 ; <i1> [#uses=1] + %4 = trunc i64 %IS to i32 ; <i32> [#uses=1] + %5 = xor i32 %4, 1 ; <i32> [#uses=1] + %6 = trunc i64 %OS to i32 ; <i32> [#uses=1] + %7 = xor i32 %6, 1 ; <i32> [#uses=1] + %8 = or i32 %7, %5 ; <i32> [#uses=1] + %9 = icmp eq i32 %8, 0 ; <i1> [#uses=1] + br i1 %9, label %bb, label %return + +bb: ; preds = %entry + %10 = load float* %Start, align 4 ; <float> [#uses=1] + br label %bb2 + +bb1: ; preds = %bb3 + %11 = load float* %I_addr.0, align 4 ; <float> [#uses=1] + %12 = fmul float %11, %x.0 ; <float> [#uses=1] + store float %12, float* %O_addr.0, align 4 + %13 = fadd float %x.0, %0 ; <float> [#uses=1] + %indvar.next53 = add i64 %14, 1 ; <i64> [#uses=1] + br label %bb2 + +bb2: ; preds = %bb1, %bb + %14 = phi i64 [ %indvar.next53, %bb1 ], [ 0, %bb ] ; <i64> [#uses=21] + %x.0 = phi float [ %13, %bb1 ], [ %10, %bb ] ; <float> [#uses=6] + %N_addr.0 = sub i64 %N, %14 ; <i64> [#uses=4] + %O_addr.0 = getelementptr float* %O, i64 %14 ; <float*> [#uses=4] + %I_addr.0 = getelementptr float* %I, i64 %14 ; <float*> [#uses=3] + %15 = icmp slt i64 %N_addr.0, 1 ; <i1> [#uses=1] + br i1 %15, label %bb4, label %bb3 + +bb3: ; preds = %bb2 + %16 = ptrtoint float* %O_addr.0 to i64 ; <i64> [#uses=1] + %17 = and i64 %16, 15 ; <i64> [#uses=1] + %18 = icmp eq i64 %17, 0 ; <i1> [#uses=1] + br i1 %18, label %bb4, label %bb1 + +bb4: ; preds = %bb3, %bb2 + %19 = fmul float %0, 4.000000e+00 ; <float> [#uses=1] + store float %19, float* %times4, align 4 + %20 = fmul float %0, 1.600000e+01 ; <float> [#uses=1] + store float %20, float* %timesN, align 4 + %21 = fmul float %0, 0.000000e+00 ; <float> [#uses=1] + %22 = fadd float %21, %x.0 ; <float> [#uses=1] + %23 = fadd float %x.0, %0 ; <float> [#uses=1] + %24 = fmul float %0, 2.000000e+00 ; <float> [#uses=1] + %25 = fadd float %24, %x.0 ; <float> [#uses=1] + %26 = fmul float %0, 3.000000e+00 ; <float> [#uses=1] + %27 = fadd float %26, %x.0 ; <float> [#uses=1] + %28 = insertelement <4 x float> undef, float %22, i32 0 ; <<4 x float>> [#uses=1] + %29 = insertelement <4 x float> %28, float %23, i32 1 ; <<4 x float>> [#uses=1] + %30 = insertelement <4 x float> %29, float %25, i32 2 ; <<4 x float>> [#uses=1] + %31 = insertelement <4 x float> %30, float %27, i32 3 ; <<4 x float>> [#uses=5] + %asmtmp.i = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=3] + %32 = fadd <4 x float> %31, %asmtmp.i ; <<4 x float>> [#uses=3] + %33 = fadd <4 x float> %32, %asmtmp.i ; <<4 x float>> [#uses=3] + %34 = fadd <4 x float> %33, %asmtmp.i ; <<4 x float>> [#uses=2] + %asmtmp.i18 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %timesN) nounwind ; <<4 x float>> [#uses=8] + %35 = icmp sgt i64 %N_addr.0, 15 ; <i1> [#uses=2] + br i1 %3, label %bb6.preheader, label %bb8 + +bb6.preheader: ; preds = %bb4 + br i1 %35, label %bb.nph43, label %bb7 + +bb.nph43: ; preds = %bb6.preheader + %tmp108 = add i64 %14, 16 ; <i64> [#uses=1] + %tmp111 = add i64 %14, 4 ; <i64> [#uses=1] + %tmp115 = add i64 %14, 8 ; <i64> [#uses=1] + %tmp119 = add i64 %14, 12 ; <i64> [#uses=1] + %tmp134 = add i64 %N, -16 ; <i64> [#uses=1] + %tmp135 = sub i64 %tmp134, %14 ; <i64> [#uses=1] + call void asm sideeffect "# Start.", "~{dirflag},~{fpsr},~{flags}"() nounwind + br label %bb5 + +bb5: ; preds = %bb.nph43, %bb5 + %indvar102 = phi i64 [ 0, %bb.nph43 ], [ %indvar.next103, %bb5 ] ; <i64> [#uses=3] + %vX3.041 = phi <4 x float> [ %34, %bb.nph43 ], [ %45, %bb5 ] ; <<4 x float>> [#uses=2] + %vX0.039 = phi <4 x float> [ %31, %bb.nph43 ], [ %41, %bb5 ] ; <<4 x float>> [#uses=2] + %vX2.037 = phi <4 x float> [ %33, %bb.nph43 ], [ %46, %bb5 ] ; <<4 x float>> [#uses=2] + %vX1.036 = phi <4 x float> [ %32, %bb.nph43 ], [ %47, %bb5 ] ; <<4 x float>> [#uses=2] + %tmp104 = shl i64 %indvar102, 4 ; <i64> [#uses=5] + %tmp105 = add i64 %14, %tmp104 ; <i64> [#uses=2] + %scevgep106 = getelementptr float* %I, i64 %tmp105 ; <float*> [#uses=1] + %scevgep106107 = bitcast float* %scevgep106 to <4 x float>* ; <<4 x float>*> [#uses=1] + %tmp109 = add i64 %tmp108, %tmp104 ; <i64> [#uses=2] + %tmp112 = add i64 %tmp111, %tmp104 ; <i64> [#uses=2] + %scevgep113 = getelementptr float* %I, i64 %tmp112 ; <float*> [#uses=1] + %scevgep113114 = bitcast float* %scevgep113 to <4 x float>* ; <<4 x float>*> [#uses=1] + %tmp116 = add i64 %tmp115, %tmp104 ; <i64> [#uses=2] + %scevgep117 = getelementptr float* %I, i64 %tmp116 ; <float*> [#uses=1] + %scevgep117118 = bitcast float* %scevgep117 to <4 x float>* ; <<4 x float>*> [#uses=1] + %tmp120 = add i64 %tmp119, %tmp104 ; <i64> [#uses=2] + %scevgep121 = getelementptr float* %I, i64 %tmp120 ; <float*> [#uses=1] + %scevgep121122 = bitcast float* %scevgep121 to <4 x float>* ; <<4 x float>*> [#uses=1] + %scevgep123 = getelementptr float* %O, i64 %tmp105 ; <float*> [#uses=1] + %scevgep123124 = bitcast float* %scevgep123 to <4 x float>* ; <<4 x float>*> [#uses=1] + %scevgep126 = getelementptr float* %O, i64 %tmp112 ; <float*> [#uses=1] + %scevgep126127 = bitcast float* %scevgep126 to <4 x float>* ; <<4 x float>*> [#uses=1] + %scevgep128 = getelementptr float* %O, i64 %tmp116 ; <float*> [#uses=1] + %scevgep128129 = bitcast float* %scevgep128 to <4 x float>* ; <<4 x float>*> [#uses=1] + %scevgep130 = getelementptr float* %O, i64 %tmp120 ; <float*> [#uses=1] + %scevgep130131 = bitcast float* %scevgep130 to <4 x float>* ; <<4 x float>*> [#uses=1] + %tmp132 = mul i64 %indvar102, -16 ; <i64> [#uses=1] + %tmp136 = add i64 %tmp135, %tmp132 ; <i64> [#uses=2] + %36 = load <4 x float>* %scevgep106107, align 16 ; <<4 x float>> [#uses=1] + %37 = load <4 x float>* %scevgep113114, align 16 ; <<4 x float>> [#uses=1] + %38 = load <4 x float>* %scevgep117118, align 16 ; <<4 x float>> [#uses=1] + %39 = load <4 x float>* %scevgep121122, align 16 ; <<4 x float>> [#uses=1] + %40 = fmul <4 x float> %36, %vX0.039 ; <<4 x float>> [#uses=1] + %41 = fadd <4 x float> %vX0.039, %asmtmp.i18 ; <<4 x float>> [#uses=2] + %42 = fmul <4 x float> %37, %vX1.036 ; <<4 x float>> [#uses=1] + %43 = fmul <4 x float> %38, %vX2.037 ; <<4 x float>> [#uses=1] + %44 = fmul <4 x float> %39, %vX3.041 ; <<4 x float>> [#uses=1] + store <4 x float> %40, <4 x float>* %scevgep123124, align 16 + store <4 x float> %42, <4 x float>* %scevgep126127, align 16 + store <4 x float> %43, <4 x float>* %scevgep128129, align 16 + store <4 x float> %44, <4 x float>* %scevgep130131, align 16 + %45 = fadd <4 x float> %vX3.041, %asmtmp.i18 ; <<4 x float>> [#uses=1] + %46 = fadd <4 x float> %vX2.037, %asmtmp.i18 ; <<4 x float>> [#uses=1] + %47 = fadd <4 x float> %vX1.036, %asmtmp.i18 ; <<4 x float>> [#uses=1] + %48 = icmp sgt i64 %tmp136, 15 ; <i1> [#uses=1] + %indvar.next103 = add i64 %indvar102, 1 ; <i64> [#uses=1] + br i1 %48, label %bb5, label %bb6.bb7_crit_edge + +bb6.bb7_crit_edge: ; preds = %bb5 + call void asm sideeffect "# Stop.", "~{dirflag},~{fpsr},~{flags}"() nounwind + %scevgep110 = getelementptr float* %I, i64 %tmp109 ; <float*> [#uses=1] + %scevgep125 = getelementptr float* %O, i64 %tmp109 ; <float*> [#uses=1] + br label %bb7 + +bb7: ; preds = %bb6.bb7_crit_edge, %bb6.preheader + %I_addr.1.lcssa = phi float* [ %scevgep110, %bb6.bb7_crit_edge ], [ %I_addr.0, %bb6.preheader ] ; <float*> [#uses=1] + %O_addr.1.lcssa = phi float* [ %scevgep125, %bb6.bb7_crit_edge ], [ %O_addr.0, %bb6.preheader ] ; <float*> [#uses=1] + %vX0.0.lcssa = phi <4 x float> [ %41, %bb6.bb7_crit_edge ], [ %31, %bb6.preheader ] ; <<4 x float>> [#uses=1] + %N_addr.1.lcssa = phi i64 [ %tmp136, %bb6.bb7_crit_edge ], [ %N_addr.0, %bb6.preheader ] ; <i64> [#uses=1] + %asmtmp.i17 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=0] + br label %bb11 + +bb8: ; preds = %bb4 + br i1 %35, label %bb.nph, label %bb11 + +bb.nph: ; preds = %bb8 + %I_addr.0.sum = add i64 %14, -1 ; <i64> [#uses=1] + %49 = getelementptr inbounds float* %I, i64 %I_addr.0.sum ; <float*> [#uses=1] + %50 = bitcast float* %49 to <4 x float>* ; <<4 x float>*> [#uses=1] + %51 = load <4 x float>* %50, align 16 ; <<4 x float>> [#uses=1] + %tmp54 = add i64 %14, 16 ; <i64> [#uses=1] + %tmp56 = add i64 %14, 3 ; <i64> [#uses=1] + %tmp60 = add i64 %14, 7 ; <i64> [#uses=1] + %tmp64 = add i64 %14, 11 ; <i64> [#uses=1] + %tmp68 = add i64 %14, 15 ; <i64> [#uses=1] + %tmp76 = add i64 %14, 4 ; <i64> [#uses=1] + %tmp80 = add i64 %14, 8 ; <i64> [#uses=1] + %tmp84 = add i64 %14, 12 ; <i64> [#uses=1] + %tmp90 = add i64 %N, -16 ; <i64> [#uses=1] + %tmp91 = sub i64 %tmp90, %14 ; <i64> [#uses=1] + br label %bb9 + +bb9: ; preds = %bb.nph, %bb9 + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb9 ] ; <i64> [#uses=3] + %vX3.125 = phi <4 x float> [ %34, %bb.nph ], [ %69, %bb9 ] ; <<4 x float>> [#uses=2] + %vX0.223 = phi <4 x float> [ %31, %bb.nph ], [ %65, %bb9 ] ; <<4 x float>> [#uses=2] + %vX2.121 = phi <4 x float> [ %33, %bb.nph ], [ %70, %bb9 ] ; <<4 x float>> [#uses=2] + %vX1.120 = phi <4 x float> [ %32, %bb.nph ], [ %71, %bb9 ] ; <<4 x float>> [#uses=2] + %vI0.019 = phi <4 x float> [ %51, %bb.nph ], [ %55, %bb9 ] ; <<4 x float>> [#uses=1] + %tmp51 = shl i64 %indvar, 4 ; <i64> [#uses=9] + %tmp55 = add i64 %tmp54, %tmp51 ; <i64> [#uses=2] + %tmp57 = add i64 %tmp56, %tmp51 ; <i64> [#uses=1] + %scevgep58 = getelementptr float* %I, i64 %tmp57 ; <float*> [#uses=1] + %scevgep5859 = bitcast float* %scevgep58 to <4 x float>* ; <<4 x float>*> [#uses=1] + %tmp61 = add i64 %tmp60, %tmp51 ; <i64> [#uses=1] + %scevgep62 = getelementptr float* %I, i64 %tmp61 ; <float*> [#uses=1] + %scevgep6263 = bitcast float* %scevgep62 to <4 x float>* ; <<4 x float>*> [#uses=1] + %tmp65 = add i64 %tmp64, %tmp51 ; <i64> [#uses=1] + %scevgep66 = getelementptr float* %I, i64 %tmp65 ; <float*> [#uses=1] + %scevgep6667 = bitcast float* %scevgep66 to <4 x float>* ; <<4 x float>*> [#uses=1] + %tmp69 = add i64 %tmp68, %tmp51 ; <i64> [#uses=1] + %scevgep70 = getelementptr float* %I, i64 %tmp69 ; <float*> [#uses=1] + %scevgep7071 = bitcast float* %scevgep70 to <4 x float>* ; <<4 x float>*> [#uses=1] + %tmp72 = add i64 %14, %tmp51 ; <i64> [#uses=1] + %scevgep73 = getelementptr float* %O, i64 %tmp72 ; <float*> [#uses=1] + %scevgep7374 = bitcast float* %scevgep73 to <4 x float>* ; <<4 x float>*> [#uses=1] + %tmp77 = add i64 %tmp76, %tmp51 ; <i64> [#uses=1] + %scevgep78 = getelementptr float* %O, i64 %tmp77 ; <float*> [#uses=1] + %scevgep7879 = bitcast float* %scevgep78 to <4 x float>* ; <<4 x float>*> [#uses=1] + %tmp81 = add i64 %tmp80, %tmp51 ; <i64> [#uses=1] + %scevgep82 = getelementptr float* %O, i64 %tmp81 ; <float*> [#uses=1] + %scevgep8283 = bitcast float* %scevgep82 to <4 x float>* ; <<4 x float>*> [#uses=1] + %tmp85 = add i64 %tmp84, %tmp51 ; <i64> [#uses=1] + %scevgep86 = getelementptr float* %O, i64 %tmp85 ; <float*> [#uses=1] + %scevgep8687 = bitcast float* %scevgep86 to <4 x float>* ; <<4 x float>*> [#uses=1] + %tmp88 = mul i64 %indvar, -16 ; <i64> [#uses=1] + %tmp92 = add i64 %tmp91, %tmp88 ; <i64> [#uses=2] + %52 = load <4 x float>* %scevgep5859, align 16 ; <<4 x float>> [#uses=2] + %53 = load <4 x float>* %scevgep6263, align 16 ; <<4 x float>> [#uses=2] + %54 = load <4 x float>* %scevgep6667, align 16 ; <<4 x float>> [#uses=2] + %55 = load <4 x float>* %scevgep7071, align 16 ; <<4 x float>> [#uses=2] + %56 = shufflevector <4 x float> %vI0.019, <4 x float> %52, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] + %57 = shufflevector <4 x float> %56, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1] + %58 = shufflevector <4 x float> %52, <4 x float> %53, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] + %59 = shufflevector <4 x float> %58, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1] + %60 = shufflevector <4 x float> %53, <4 x float> %54, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] + %61 = shufflevector <4 x float> %60, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1] + %62 = shufflevector <4 x float> %54, <4 x float> %55, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] + %63 = shufflevector <4 x float> %62, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1] + %64 = fmul <4 x float> %57, %vX0.223 ; <<4 x float>> [#uses=1] + %65 = fadd <4 x float> %vX0.223, %asmtmp.i18 ; <<4 x float>> [#uses=2] + %66 = fmul <4 x float> %59, %vX1.120 ; <<4 x float>> [#uses=1] + %67 = fmul <4 x float> %61, %vX2.121 ; <<4 x float>> [#uses=1] + %68 = fmul <4 x float> %63, %vX3.125 ; <<4 x float>> [#uses=1] + store <4 x float> %64, <4 x float>* %scevgep7374, align 16 + store <4 x float> %66, <4 x float>* %scevgep7879, align 16 + store <4 x float> %67, <4 x float>* %scevgep8283, align 16 + store <4 x float> %68, <4 x float>* %scevgep8687, align 16 + %69 = fadd <4 x float> %vX3.125, %asmtmp.i18 ; <<4 x float>> [#uses=1] + %70 = fadd <4 x float> %vX2.121, %asmtmp.i18 ; <<4 x float>> [#uses=1] + %71 = fadd <4 x float> %vX1.120, %asmtmp.i18 ; <<4 x float>> [#uses=1] + %72 = icmp sgt i64 %tmp92, 15 ; <i1> [#uses=1] + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1] + br i1 %72, label %bb9, label %bb10.bb11.loopexit_crit_edge + +bb10.bb11.loopexit_crit_edge: ; preds = %bb9 + %scevgep = getelementptr float* %I, i64 %tmp55 ; <float*> [#uses=1] + %scevgep75 = getelementptr float* %O, i64 %tmp55 ; <float*> [#uses=1] + br label %bb11 + +bb11: ; preds = %bb8, %bb10.bb11.loopexit_crit_edge, %bb7 + %N_addr.2 = phi i64 [ %N_addr.1.lcssa, %bb7 ], [ %tmp92, %bb10.bb11.loopexit_crit_edge ], [ %N_addr.0, %bb8 ] ; <i64> [#uses=2] + %vX0.1 = phi <4 x float> [ %vX0.0.lcssa, %bb7 ], [ %65, %bb10.bb11.loopexit_crit_edge ], [ %31, %bb8 ] ; <<4 x float>> [#uses=1] + %O_addr.2 = phi float* [ %O_addr.1.lcssa, %bb7 ], [ %scevgep75, %bb10.bb11.loopexit_crit_edge ], [ %O_addr.0, %bb8 ] ; <float*> [#uses=1] + %I_addr.2 = phi float* [ %I_addr.1.lcssa, %bb7 ], [ %scevgep, %bb10.bb11.loopexit_crit_edge ], [ %I_addr.0, %bb8 ] ; <float*> [#uses=1] + %73 = extractelement <4 x float> %vX0.1, i32 0 ; <float> [#uses=2] + %74 = icmp sgt i64 %N_addr.2, 0 ; <i1> [#uses=1] + br i1 %74, label %bb12, label %bb14 + +bb12: ; preds = %bb11, %bb12 + %indvar94 = phi i64 [ %indvar.next95, %bb12 ], [ 0, %bb11 ] ; <i64> [#uses=3] + %x.130 = phi float [ %77, %bb12 ], [ %73, %bb11 ] ; <float> [#uses=2] + %I_addr.433 = getelementptr float* %I_addr.2, i64 %indvar94 ; <float*> [#uses=1] + %O_addr.432 = getelementptr float* %O_addr.2, i64 %indvar94 ; <float*> [#uses=1] + %75 = load float* %I_addr.433, align 4 ; <float> [#uses=1] + %76 = fmul float %75, %x.130 ; <float> [#uses=1] + store float %76, float* %O_addr.432, align 4 + %77 = fadd float %x.130, %0 ; <float> [#uses=2] + %indvar.next95 = add i64 %indvar94, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %indvar.next95, %N_addr.2 ; <i1> [#uses=1] + br i1 %exitcond, label %bb14, label %bb12 + +bb14: ; preds = %bb12, %bb11 + %x.1.lcssa = phi float [ %73, %bb11 ], [ %77, %bb12 ] ; <float> [#uses=1] + store float %x.1.lcssa, float* %Start, align 4 + ret void + +return: ; preds = %entry + ret void +} diff --git a/llvm/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/llvm/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll index 8959c177409..59f14fcd1ce 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc)} +; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc with loop %loop)} ; The value of %r is dependent on a polynomial iteration expression. |