diff options
| author | Max Kazantsev <max.kazantsev@azul.com> | 2018-09-04 05:01:35 +0000 |
|---|---|---|
| committer | Max Kazantsev <max.kazantsev@azul.com> | 2018-09-04 05:01:35 +0000 |
| commit | 2cbba5633753552a984572c8b9a5997e5c96496d (patch) | |
| tree | a900bb79706f535da925a223e42953de9104d578 | |
| parent | bd203e03f89a0664c22c7b9ca74fd1b2fdf6f0c0 (diff) | |
| download | bcm5719-llvm-2cbba5633753552a984572c8b9a5997e5c96496d.tar.gz bcm5719-llvm-2cbba5633753552a984572c8b9a5997e5c96496d.zip | |
[IndVars] Fix usage of SCEVExpander to not mess with SCEVConstant. PR38674
This patch removes the function `expandSCEVIfNeeded` which behaves not as
it was intended. This function tries to make a lookup for exact existing expansion
and only goes to normal expansion via `expandCodeFor` if this lookup hasn't found
anything. As a result of this, if some instruction above the loop has a `SCEVConstant`
SCEV, this logic will return this instruction when asked for this `SCEVConstant` rather
than return a constant value. This is both non-profitable and in some cases leads to
breach of LCSSA form (as in PR38674).
Whether or not it is possible to break LCSSA with this algorithm and with some
non-constant SCEVs is still in question, this is still being investigated. I wasn't
able to construct such a test so far, so maybe this situation is impossible. If it is,
it will go as a separate fix.
Rather than do it, it is always correct to just invoke `expandCodeFor` unconditionally:
it behaves smarter about insertion points, and as side effect of this it will choose a
constant value for SCEVConstants. For other SCEVs it may end up finding a better insertion
point. So it should not be worse in any case.
NOTE: So far the only known case for which this transform may break LCSSA is mapping
of SCEVConstant to an instruction. However there is a suspicion that the entire algorithm
can compromise LCSSA form for other cases as well (yet not proved).
Differential Revision: https://reviews.llvm.org/D51286
Reviewed By: etherzhhb
llvm-svn: 341345
| -rw-r--r-- | llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 19 | ||||
| -rw-r--r-- | llvm/test/Transforms/IndVarSimplify/pr38674.ll | 141 |
2 files changed, 142 insertions, 18 deletions
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index e2c2ff2e230..372ea61c159 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -152,9 +152,6 @@ class IndVarSimplify { void sinkUnusedInvariants(Loop *L); - Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L, - Instruction *InsertPt, Type *Ty); - public: IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, const DataLayout &DL, TargetLibraryInfo *TLI, @@ -521,19 +518,6 @@ struct RewritePhi { } // end anonymous namespace -Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, - Loop *L, Instruction *InsertPt, - Type *ResultTy) { - // Before expanding S into an expensive LLVM expression, see if we can use an - // already existing value as the expansion for S. - if (Value *ExistingValue = Rewriter.getExactExistingExpansion(S, InsertPt, L)) - if (ExistingValue->getType() == ResultTy) - return ExistingValue; - - // We didn't find anything, fall back to using SCEVExpander. - return Rewriter.expandCodeFor(S, ResultTy, InsertPt); -} - //===----------------------------------------------------------------------===// // rewriteLoopExitValues - Optimize IV users outside the loop. // As a side effect, reduces the amount of IV processing within the loop. @@ -650,8 +634,7 @@ void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { } bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); - Value *ExitVal = - expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType()); + Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' diff --git a/llvm/test/Transforms/IndVarSimplify/pr38674.ll b/llvm/test/Transforms/IndVarSimplify/pr38674.ll new file mode 100644 index 00000000000..78d59a372f2 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/pr38674.ll @@ -0,0 +1,141 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -indvars < %s | FileCheck %s + +; Check that we don't reuse %zext instead of %inc11 for LCSSA Phi node. Case +; with constants SCEV. + +define i32 @test_01() { +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] +; CHECK: for.cond1.preheader: +; CHECK-NEXT: br label [[FOR_COND4_PREHEADER:%.*]] +; CHECK: for.cond4.preheader: +; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 1 to i32 +; CHECK-NEXT: br label [[FOR_BODY6:%.*]] +; CHECK: for.cond4: +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC:%.*]], 2 +; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY6]], label [[FOR_END:%.*]] +; CHECK: for.body6: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_COND4_PREHEADER]] ], [ [[INC]], [[FOR_COND4:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], [[ZEXT]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[TMP0]], label [[RETURN_LOOPEXIT:%.*]], label [[FOR_COND4]] +; CHECK: for.end: +; CHECK-NEXT: br i1 false, label [[FOR_COND4_PREHEADER]], label [[FOR_END9:%.*]] +; CHECK: for.end9: +; CHECK-NEXT: br i1 false, label [[FOR_COND1_PREHEADER]], label [[RETURN_LOOPEXIT3:%.*]] +; CHECK: return.loopexit: +; CHECK-NEXT: unreachable +; CHECK: return.loopexit3: +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 1 +; +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.end9, %entry + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.end, %for.cond1.preheader + %zext = zext i16 1 to i32 + br label %for.body6 + +for.cond4: ; preds = %for.body6 + %cmp5 = icmp ult i32 %inc, 2 + br i1 %cmp5, label %for.body6, label %for.end + +for.body6: ; preds = %for.cond4, %for.cond4.preheader + %iv = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.cond4 ] + %0 = icmp eq i32 %iv, %zext + %inc = add nuw nsw i32 %iv, 1 + br i1 %0, label %return.loopexit, label %for.cond4 + +for.end: ; preds = %for.cond4 + br i1 false, label %for.cond4.preheader, label %for.end9 + +for.end9: ; preds = %for.end + %inc11 = add nuw nsw i32 0, 1 + br i1 false, label %for.cond1.preheader, label %return.loopexit3 + +return.loopexit: ; preds = %for.body6 + unreachable + +return.loopexit3: ; preds = %for.end9 + %inc11.lcssa = phi i32 [ %inc11, %for.end9 ] + br label %return + +return: ; preds = %return.loopexit3 + ret i32 %inc11.lcssa +} + +; Same as test_01, but the instructions with the same SCEV have a non-constant +; SCEV. +define i32 @test_02(i32 %x) { +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] +; CHECK: for.cond1.preheader: +; CHECK-NEXT: br label [[FOR_COND4_PREHEADER:%.*]] +; CHECK: for.cond4.preheader: +; CHECK-NEXT: [[ZEXT:%.*]] = mul i32 [[X:%.*]], 1 +; CHECK-NEXT: br label [[FOR_BODY6:%.*]] +; CHECK: for.cond4: +; CHECK-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC:%.*]], 2 +; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY6]], label [[FOR_END:%.*]] +; CHECK: for.body6: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[FOR_COND4_PREHEADER]] ], [ [[INC]], [[FOR_COND4:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], [[ZEXT]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: br i1 [[TMP0]], label [[RETURN_LOOPEXIT:%.*]], label [[FOR_COND4]] +; CHECK: for.end: +; CHECK-NEXT: br i1 false, label [[FOR_COND4_PREHEADER]], label [[FOR_END9:%.*]] +; CHECK: for.end9: +; CHECK-NEXT: [[INC11:%.*]] = add nuw nsw i32 0, [[X]] +; CHECK-NEXT: br i1 false, label [[FOR_COND1_PREHEADER]], label [[RETURN_LOOPEXIT3:%.*]] +; CHECK: return.loopexit: +; CHECK-NEXT: unreachable +; CHECK: return.loopexit3: +; CHECK-NEXT: [[INC11_LCSSA:%.*]] = phi i32 [ [[INC11]], [[FOR_END9]] ] +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: return: +; CHECK-NEXT: ret i32 [[INC11_LCSSA]] +; +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.end9, %entry + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.end, %for.cond1.preheader + %zext = mul i32 %x, 1 + br label %for.body6 + +for.cond4: ; preds = %for.body6 + %cmp5 = icmp ult i32 %inc, 2 + br i1 %cmp5, label %for.body6, label %for.end + +for.body6: ; preds = %for.cond4, %for.cond4.preheader + %iv = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.cond4 ] + %0 = icmp eq i32 %iv, %zext + %inc = add nuw nsw i32 %iv, 1 + br i1 %0, label %return.loopexit, label %for.cond4 + +for.end: ; preds = %for.cond4 + br i1 false, label %for.cond4.preheader, label %for.end9 + +for.end9: ; preds = %for.end + %inc11 = add nuw nsw i32 0, %x + br i1 false, label %for.cond1.preheader, label %return.loopexit3 + +return.loopexit: ; preds = %for.body6 + unreachable + +return.loopexit3: ; preds = %for.end9 + %inc11.lcssa = phi i32 [ %inc11, %for.end9 ] + br label %return + +return: ; preds = %return.loopexit3 + ret i32 %inc11.lcssa +} |

