diff options
| author | Florian Hahn <florian.hahn@arm.com> | 2018-09-26 19:34:25 +0000 | 
|---|---|---|
| committer | Florian Hahn <florian.hahn@arm.com> | 2018-09-26 19:34:25 +0000 | 
| commit | 6feb637124e5bf5a3bb45c1b2106d622b93e7c28 (patch) | |
| tree | ca308325099cf164a112bd309bfdf4bd7d41e347 /llvm | |
| parent | 79c88c31056fc52f8414c497f2d6e2e53749c48b (diff) | |
| download | bcm5719-llvm-6feb637124e5bf5a3bb45c1b2106d622b93e7c28.tar.gz bcm5719-llvm-6feb637124e5bf5a3bb45c1b2106d622b93e7c28.zip  | |
[LoopInterchange] Preserve LCSSA.
This patch extends LoopInterchange to move LCSSA to the right place
after interchanging. This is required for LoopInterchange to become a
function pass.
An alternative to the manual moving of the PHIs, we could also re-form
the LCSSA phis for a set of interchanged loops, but that's more
expensive.
Reviewers: efriedma, mcrosier, davide
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D52154
llvm-svn: 343132
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 72 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopInterchange/interchangeable.ll | 2 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopInterchange/lcssa.ll | 204 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopInterchange/phi-ordering.ll | 2 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopInterchange/reductions.ll | 48 | 
5 files changed, 252 insertions, 76 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 38c9396b954..3be41646741 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -411,8 +411,6 @@ private:    bool adjustLoopLinks();    void adjustLoopPreheaders();    bool adjustLoopBranches(); -  void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred, -                           BasicBlock *NewPred);    Loop *OuterLoop;    Loop *InnerLoop; @@ -455,6 +453,7 @@ struct LoopInterchange : public FunctionPass {      AU.addPreserved<DominatorTreeWrapperPass>();      AU.addPreserved<LoopInfoWrapperPass>();      AU.addPreserved<ScalarEvolutionWrapperPass>(); +    AU.addPreservedID(LCSSAID);    }    bool runOnFunction(Function &F) override { @@ -1297,9 +1296,8 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {                  FromBB->getTerminator()->getIterator());  } -void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock, -                                                   BasicBlock *OldPred, -                                                   BasicBlock *NewPred) { +static void updateIncomingBlock(BasicBlock *CurrBlock, BasicBlock *OldPred, +                                BasicBlock *NewPred) {    for (PHINode &PHI : CurrBlock->phis()) {      unsigned Num = PHI.getNumIncomingValues();      for (unsigned i = 0; i < Num; ++i) { @@ -1330,6 +1328,52 @@ static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,    }  } +// Move Lcssa PHIs to the right place. +static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerLatch, +                          BasicBlock *OuterLatch) { +  SmallVector<PHINode *, 8> LcssaInnerExit; +  for (PHINode &P : InnerExit->phis()) +    LcssaInnerExit.push_back(&P); + +  SmallVector<PHINode *, 8> LcssaInnerLatch; +  for (PHINode &P : InnerLatch->phis()) +    LcssaInnerLatch.push_back(&P); + +  // Lcssa PHIs for values used outside the inner loop are in InnerExit. +  // If a PHI node has users outside of InnerExit, it has a use outside the +  // interchanged loop and we have to preserve it. We move these to +  // InnerLatch, which will become the new exit block for the innermost +  // loop after interchanging. For PHIs only used in InnerExit, we can just +  // replace them with the incoming value. +  for (PHINode *P : LcssaInnerExit) { +    bool hasUsersOutside = false; +    for (auto UI = P->use_begin(), E = P->use_end(); UI != E;) { +      Use &U = *UI; +      ++UI; +      auto *Usr = cast<Instruction>(U.getUser()); +      if (Usr->getParent() != InnerExit) { +        hasUsersOutside = true; +        continue; +      } +      U.set(P->getIncomingValueForBlock(InnerLatch)); +    } +    if (hasUsersOutside) +      P->moveBefore(InnerLatch->getFirstNonPHI()); +    else +      P->eraseFromParent(); +  } + +  // If the inner loop latch contains LCSSA PHIs, those come from a child loop +  // and we have to move them to the new inner latch. +  for (PHINode *P : LcssaInnerLatch) +    P->moveBefore(InnerExit->getFirstNonPHI()); + +  // Now adjust the incoming blocks for the LCSSA PHIs. +  // For PHIs moved from Inner's exit block, we need to replace Inner's latch +  // with the new latch. +  updateIncomingBlock(InnerLatch, InnerLatch, OuterLatch); +} +  bool LoopInterchangeTransform::adjustLoopBranches() {    LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");    std::vector<DominatorTree::UpdateType> DTUpdates; @@ -1409,17 +1453,6 @@ bool LoopInterchangeTransform::adjustLoopBranches() {    updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,                    InnerLoopLatchSuccessor, DTUpdates); -  // Adjust PHI nodes in InnerLoopLatchSuccessor. Update all uses of PHI with -  // the value and remove this PHI node from inner loop. -  SmallVector<PHINode *, 8> LcssaVec; -  for (PHINode &P : InnerLoopLatchSuccessor->phis()) -    LcssaVec.push_back(&P); - -  for (PHINode *P : LcssaVec) { -    Value *Incoming = P->getIncomingValueForBlock(InnerLoopLatch); -    P->replaceAllUsesWith(Incoming); -    P->eraseFromParent(); -  }    if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)      OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); @@ -1431,12 +1464,15 @@ bool LoopInterchangeTransform::adjustLoopBranches() {    updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,                    DTUpdates); -  updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); -    DT->applyUpdates(DTUpdates);    restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,                     OuterLoopPreHeader); +  moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopLatch, OuterLoopLatch); +  // For PHIs in the exit block of the outer loop, outer's latch has been +  // replaced by Inners'. +  updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); +    // Now update the reduction PHIs in the inner and outer loop headers.    SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;    for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1)) diff --git a/llvm/test/Transforms/LoopInterchange/interchangeable.ll b/llvm/test/Transforms/LoopInterchange/interchangeable.ll index 44985d8a79c..a97981c5128 100644 --- a/llvm/test/Transforms/LoopInterchange/interchangeable.ll +++ b/llvm/test/Transforms/LoopInterchange/interchangeable.ll @@ -1,5 +1,5 @@  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -S | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"  target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopInterchange/lcssa.ll b/llvm/test/Transforms/LoopInterchange/lcssa.ll index b44c2404464..8886cf4925f 100644 --- a/llvm/test/Transforms/LoopInterchange/lcssa.ll +++ b/llvm/test/Transforms/LoopInterchange/lcssa.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t +; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -pass-remarks-output=%t  ; RUN: cat %t |  FileCheck --check-prefix REMARK %s  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -17,20 +17,20 @@ target triple = "x86_64-unknown-linux-gnu"  ; REMARK: UnsupportedExitPHI  ; REMARK-NEXT: lcssa_01 -define void @lcssa_01(){ +define void @lcssa_01() {  entry:    %cmp21 = icmp sgt i64 100, 1    br i1 %cmp21, label %outer.ph, label %for.end16 -outer.ph: +outer.ph:                                         ; preds = %entry    %cmp218 = icmp sgt i64 100, 1    br label %outer.header -outer.header: -  %iv.outer= phi i64 [ 1, %outer.ph ], [ %iv.outer.next, %outer.inc ] +outer.header:                                     ; preds = %outer.inc, %outer.ph +  %iv.outer = phi i64 [ 1, %outer.ph ], [ %iv.outer.next, %outer.inc ]    br i1 %cmp218, label %for.body3, label %outer.inc -for.body3: +for.body3:                                        ; preds = %for.body3, %outer.header    %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]    %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer    %vA = load i32, i32* %arrayidx5 @@ -42,35 +42,36 @@ for.body3:    %exitcond = icmp eq i64 %iv.inner.next, 100    br i1 %exitcond, label %outer.inc, label %for.body3 -outer.inc: +outer.inc:                                        ; preds = %for.body3, %outer.header    %iv.outer.next = add nsw i64 %iv.outer, 1    %cmp = icmp eq i64 %iv.outer.next, 100    br i1 %cmp, label %outer.header, label %for.exit -for.exit: -  store i64 %iv.outer.next, i64 * @Y +for.exit:                                         ; preds = %outer.inc +  %iv.outer.next.lcssa = phi i64 [ %iv.outer.next, %outer.inc ] +  store i64 %iv.outer.next.lcssa, i64* @Y    br label %for.end16 -for.end16: +for.end16:                                        ; preds = %for.exit, %entry    ret void  }  ; REMARK: UnsupportedExitPHI  ; REMARK-NEXT: lcssa_02 -define void @lcssa_02(){ +define void @lcssa_02() {  entry:    %cmp21 = icmp sgt i64 100, 1    br i1 %cmp21, label %outer.ph, label %for.end16 -outer.ph: +outer.ph:                                         ; preds = %entry    %cmp218 = icmp sgt i64 100, 1    br label %outer.header -outer.header: -  %iv.outer= phi i64 [ 1, %outer.ph ], [ %iv.outer.next, %outer.inc ] +outer.header:                                     ; preds = %outer.inc, %outer.ph +  %iv.outer = phi i64 [ 1, %outer.ph ], [ %iv.outer.next, %outer.inc ]    br i1 %cmp218, label %for.body3, label %outer.inc -for.body3: +for.body3:                                        ; preds = %for.body3, %outer.header    %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]    %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer    %vA = load i32, i32* %arrayidx5 @@ -82,32 +83,32 @@ for.body3:    %exitcond = icmp eq i64 %iv.inner.next, 100    br i1 %exitcond, label %outer.inc, label %for.body3 -outer.inc: +outer.inc:                                        ; preds = %for.body3, %outer.header    %iv.inner.end = phi i64 [ 0, %outer.header ], [ %iv.inner.next, %for.body3 ]    %iv.outer.next = add nsw i64 %iv.outer, 1    %cmp = icmp eq i64 %iv.outer.next, 100    br i1 %cmp, label %outer.header, label %for.exit -for.exit: -  store i64 %iv.inner.end, i64 * @Y +for.exit:                                         ; preds = %outer.inc +  %iv.inner.end.lcssa = phi i64 [ %iv.inner.end, %outer.inc ] +  store i64 %iv.inner.end.lcssa, i64* @Y    br label %for.end16 -for.end16: +for.end16:                                        ; preds = %for.exit, %entry    ret void  } -  ; REMARK: Interchanged  ; REMARK-NEXT: lcssa_03 -define void @lcssa_03(){ +define void @lcssa_03() {  entry:    br label %outer.header -outer.header: -  %iv.outer= phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] +outer.header:                                     ; preds = %outer.inc, %entry +  %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ]    br label %for.body3 -for.body3: +for.body3:                                        ; preds = %for.body3, %outer.header    %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]    %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer    %vA = load i32, i32* %arrayidx5 @@ -119,16 +120,18 @@ for.body3:    %exitcond = icmp eq i64 %iv.inner.next, 100    br i1 %exitcond, label %outer.inc, label %for.body3 -outer.inc: +outer.inc:                                        ; preds = %for.body3 +  %iv.inner.lcssa = phi i64 [ %iv.inner, %for.body3 ]    %iv.outer.next = add nsw i64 %iv.outer, 1    %cmp = icmp eq i64 %iv.outer.next, 100    br i1 %cmp, label %outer.header, label %for.exit -for.exit: -  store i64 %iv.inner, i64 * @Y +for.exit:                                         ; preds = %outer.inc +  %iv.inner.lcssa.lcssa = phi i64 [ %iv.inner.lcssa, %outer.inc ] +  store i64 %iv.inner.lcssa.lcssa, i64* @Y    br label %for.end16 -for.end16: +for.end16:                                        ; preds = %for.exit    ret void  } @@ -136,16 +139,17 @@ for.end16:  ;        types, as we fail to detect floating point reductions for now.  ; REMARK: UnsupportedPHIOuter  ; REMARK-NEXT: lcssa_04 -define void @lcssa_04(){ + +define void @lcssa_04() {  entry:    br label %outer.header -outer.header: -  %iv.outer= phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] -  %float.outer= phi float [ 1.0, %entry ], [ 2.0, %outer.inc ] +outer.header:                                     ; preds = %outer.inc, %entry +  %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] +  %float.outer = phi float [ 1.000000e+00, %entry ], [ 2.000000e+00, %outer.inc ]    br label %for.body3 -for.body3: +for.body3:                                        ; preds = %for.body3, %outer.header    %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ]    %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer    %vA = load i32, i32* %arrayidx5 @@ -157,15 +161,141 @@ for.body3:    %exitcond = icmp eq i64 %iv.inner.next, 100    br i1 %exitcond, label %outer.inc, label %for.body3 -outer.inc: +outer.inc:                                        ; preds = %for.body3 +  %iv.outer.next = add nsw i64 %iv.outer, 1 +  %cmp = icmp eq i64 %iv.outer.next, 100 +  br i1 %cmp, label %outer.header, label %for.exit + +for.exit:                                         ; preds = %outer.inc +  %float.outer.lcssa = phi float [ %float.outer, %outer.inc ] +  store float %float.outer.lcssa, float* @F +  br label %for.end16 + +for.end16:                                        ; preds = %for.exit +  ret void +} + +; PHI node in inner latch with multiple predecessors. +; REMARK: Interchanged +; REMARK-NEXT: lcssa_05 + +define void @lcssa_05(i32* %ptr) { +entry: +  br label %outer.header + +outer.header:                                     ; preds = %outer.inc, %entry +  %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] +  br label %for.body3 + +for.body3:                                        ; preds = %bb3, %outer.header +  %iv.inner = phi i64 [ %iv.inner.next, %bb3 ], [ 1, %outer.header ] +  br i1 undef, label %bb2, label %bb3 + +bb2:                                              ; preds = %for.body3 +  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer +  %vA = load i32, i32* %arrayidx5 +  %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %iv.inner, i64 %iv.outer +  %vC = load i32, i32* %arrayidx9 +  %add = add nsw i32 %vA, %vC +  br label %bb3 + +bb3:                                              ; preds = %bb2, %for.body3 +  %addp = phi i32 [ %add, %bb2 ], [ 0, %for.body3 ] +  store i32 %addp, i32* %ptr +  %iv.inner.next = add nuw nsw i64 %iv.inner, 1 +  %exitcond = icmp eq i64 %iv.inner.next, 100 +  br i1 %exitcond, label %outer.inc, label %for.body3 + +outer.inc:                                        ; preds = %bb3 +  %iv.inner.lcssa = phi i64 [ %iv.inner, %bb3 ] +  %iv.outer.next = add nsw i64 %iv.outer, 1 +  %cmp = icmp eq i64 %iv.outer.next, 100 +  br i1 %cmp, label %outer.header, label %for.exit + +for.exit:                                         ; preds = %outer.inc +  %iv.inner.lcssa.lcssa = phi i64 [ %iv.inner.lcssa, %outer.inc ] +  store i64 %iv.inner.lcssa.lcssa, i64* @Y +  br label %for.end16 + +for.end16:                                        ; preds = %for.exit +  ret void +} + +; REMARK: UnsupportedExitPHI +; REMARK-NEXT: lcssa_06 + +define void @lcssa_06(i64* %ptr, i32* %ptr1) { +entry: +  br label %outer.header + +outer.header:                                     ; preds = %outer.inc, %entry +  %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] +  br i1 undef, label %for.body3, label %outer.inc + +for.body3:                                        ; preds = %for.body3, %outer.header +  %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] +  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer +  %vA = load i32, i32* %arrayidx5 +  %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %iv.inner, i64 %iv.outer +  %vC = load i32, i32* %arrayidx9 +  %add = add nsw i32 %vA, %vC +  store i32 %add, i32* %ptr1 +  %iv.inner.next = add nuw nsw i64 %iv.inner, 1 +  %exitcond = icmp eq i64 %iv.inner.next, 100 +  br i1 %exitcond, label %outer.inc, label %for.body3 + +outer.inc:                                        ; preds = %for.body3, %outer.header +  %sv = phi i64 [ 0, %outer.header ], [ 1, %for.body3 ] +  store i64 %sv, i64* %ptr +  %iv.outer.next = add nsw i64 %iv.outer, 1 +  %cmp = icmp eq i64 %iv.outer.next, 100 +  br i1 %cmp, label %outer.header, label %for.exit + +for.exit:                                         ; preds = %outer.inc +  %sv.lcssa = phi i64 [ %sv, %outer.inc ] +  store i64 %sv.lcssa, i64* @Y +  br label %for.end16 + +for.end16:                                        ; preds = %for.exit +  ret void +} + +; REMARK: Interchanged +; REMARK-NEXT: lcssa_07 +define void @lcssa_07() { +entry: +  br label %outer.header + +outer.header:                                     ; preds = %outer.inc, %entry +  %iv.outer = phi i64 [ 1, %entry ], [ %iv.outer.next, %outer.inc ] +  br label %for.body3 + +for.body3:                                        ; preds = %for.body3, %outer.header +  %iv.inner = phi i64 [ %iv.inner.next, %for.body3 ], [ 1, %outer.header ] +  %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %iv.inner, i64 %iv.outer +  %vA = load i32, i32* %arrayidx5 +  %arrayidx9 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @C, i64 0, i64 %iv.inner, i64 %iv.outer +  %vC = load i32, i32* %arrayidx9 +  %add = add nsw i32 %vA, %vC +  store i32 %add, i32* %arrayidx5 +  %iv.inner.next = add nuw nsw i64 %iv.inner, 1 +  %exitcond = icmp eq i64 %iv.inner.next, 100 +  br i1 %exitcond, label %outer.bb, label %for.body3 + +outer.bb:                                         ; preds = %for.body3 +  %iv.inner.lcssa = phi i64 [ %iv.inner, %for.body3 ] +  br label %outer.inc + +outer.inc:                                        ; preds = %outer.bb    %iv.outer.next = add nsw i64 %iv.outer, 1    %cmp = icmp eq i64 %iv.outer.next, 100    br i1 %cmp, label %outer.header, label %for.exit -for.exit: -  store float %float.outer, float* @F +for.exit:                                         ; preds = %outer.inc +  %iv.inner.lcssa.lcssa = phi i64 [ %iv.inner.lcssa, %outer.inc ] +  store i64 %iv.inner.lcssa.lcssa, i64* @Y    br label %for.end16 -for.end16: +for.end16:                                        ; preds = %for.exit    ret void  } diff --git a/llvm/test/Transforms/LoopInterchange/phi-ordering.ll b/llvm/test/Transforms/LoopInterchange/phi-ordering.ll index 05c21129bab..c7416973758 100644 --- a/llvm/test/Transforms/LoopInterchange/phi-ordering.ll +++ b/llvm/test/Transforms/LoopInterchange/phi-ordering.ll @@ -1,5 +1,5 @@  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -S 2>&1 | FileCheck %s +; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S 2>&1 | FileCheck %s  ;; Checks the order of the inner phi nodes does not cause havoc.  ;; The inner loop has a reduction into c. The IV is not the first phi. diff --git a/llvm/test/Transforms/LoopInterchange/reductions.ll b/llvm/test/Transforms/LoopInterchange/reductions.ll index da92276feb4..28a2d8d6a66 100644 --- a/llvm/test/Transforms/LoopInterchange/reductions.ll +++ b/llvm/test/Transforms/LoopInterchange/reductions.ll @@ -1,5 +1,5 @@  ; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info -S -debug 2>&1 | FileCheck %s +; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info -verify-loop-lcssa -S -debug 2>&1 | FileCheck %s  @A = common global [500 x [500 x i32]] zeroinitializer  @X = common global i32 0 @@ -18,7 +18,7 @@ entry:    %cmp16 = icmp sgt i32 %N, 1    br i1 %cmp16, label %for.body3.lr.ph, label %for.end8 -for.body3.lr.ph:                                  ; preds = %entry, %for.cond1.for.inc6_crit_edge +for.body3.lr.ph:                                  ; preds = %for.cond1.for.inc6_crit_edge, %entry    %indvars.iv18 = phi i64 [ %indvars.iv.next19, %for.cond1.for.inc6_crit_edge ], [ 1, %entry ]    %X.promoted = load i32, i32* @X    br label %for.body3 @@ -35,7 +35,8 @@ for.body3:                                        ; preds = %for.body3, %for.bod    br i1 %exitcond, label %for.cond1.for.inc6_crit_edge, label %for.body3  for.cond1.for.inc6_crit_edge:                     ; preds = %for.body3 -  store i32 %add, i32* @X +  %add.lcssa = phi i32 [ %add, %for.body3 ] +  store i32 %add.lcssa, i32* @X    %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1    %lftr.wideiv20 = trunc i64 %indvars.iv.next19 to i32    %exitcond21 = icmp eq i32 %lftr.wideiv20, %N @@ -56,12 +57,12 @@ for.end8:                                         ; preds = %for.cond1.for.inc6_  ;; Loop is interchanged check that the phi nodes are split and the promoted value is used instead of the reduction phi.  ; CHECK: Loops interchanged. -define void @reduction_02(i32 %N)  { +define void @reduction_02(i32 %N) {  entry:    %cmp34 = icmp sgt i32 %N, 1    br i1 %cmp34, label %for.cond4.preheader.preheader, label %for.end19 -for.cond4.preheader.preheader:                    ; preds = %entry, %for.inc17 +for.cond4.preheader.preheader:                    ; preds = %for.inc17, %entry    %indvars.iv40 = phi i64 [ %indvars.iv.next41, %for.inc17 ], [ 1, %entry ]    br label %for.body6.lr.ph @@ -87,20 +88,25 @@ for.body6:                                        ; preds = %for.body6, %for.bod    br i1 %exitcond, label %for.cond4.for.inc14_crit_edge, label %for.body6  for.cond4.for.inc14_crit_edge:                    ; preds = %for.body6 -  store i32 %add, i32* @X -  store i32 %add13, i32* @Y +  %add.lcssa = phi i32 [ %add, %for.body6 ] +  %add13.lcssa = phi i32 [ %add13, %for.body6 ] +  store i32 %add.lcssa, i32* @X +  store i32 %add13.lcssa, i32* @Y    %indvars.iv.next37 = add nuw nsw i64 %indvars.iv36, 1    %lftr.wideiv38 = trunc i64 %indvars.iv.next37 to i32    %exitcond39 = icmp eq i32 %lftr.wideiv38, %N    br i1 %exitcond39, label %for.inc17, label %for.body6.lr.ph  for.inc17:                                        ; preds = %for.cond4.for.inc14_crit_edge +  %add.lcssa.lcssa = phi i32 [ %add.lcssa, %for.cond4.for.inc14_crit_edge ]    %indvars.iv.next41 = add nuw nsw i64 %indvars.iv40, 1    %lftr.wideiv42 = trunc i64 %indvars.iv.next41 to i32    %exitcond43 = icmp eq i32 %lftr.wideiv42, %N    br i1 %exitcond43, label %for.end19, label %for.cond4.preheader.preheader  for.end19:                                        ; preds = %for.inc17, %entry +  %res1 = phi i32 [ 0, %entry ], [ %add.lcssa.lcssa, %for.inc17 ] +  store i32 %res1, i32* @X    ret void  } @@ -117,17 +123,17 @@ for.end19:                                        ; preds = %for.inc17, %entry  ;; Not interchanged hence the phi's in the inner loop will not be split.  ; CHECK: Outer loops with reductions are not supported currently. -define void @reduction_03(i32 %N)  { +define void @reduction_03(i32 %N) {  entry:    %cmp35 = icmp sgt i32 %N, 1    br i1 %cmp35, label %for.cond4.preheader.lr.ph, label %for.end19 -for.cond4.preheader.lr.ph:                        ; preds = %entry, %for.cond1.for.inc17_crit_edge +for.cond4.preheader.lr.ph:                        ; preds = %for.cond1.for.inc17_crit_edge, %entry    %indvars.iv41 = phi i64 [ %indvars.iv.next42, %for.cond1.for.inc17_crit_edge ], [ 1, %entry ]    %Y.promoted = load i32, i32* @Y    br label %for.body6.lr.ph -for.body6.lr.ph:                                  ; preds = %for.cond4.preheader.lr.ph, %for.cond4.for.end_crit_edge +for.body6.lr.ph:                                  ; preds = %for.cond4.for.end_crit_edge, %for.cond4.preheader.lr.ph    %indvars.iv37 = phi i64 [ 1, %for.cond4.preheader.lr.ph ], [ %indvars.iv.next38, %for.cond4.for.end_crit_edge ]    %add1334 = phi i32 [ %Y.promoted, %for.cond4.preheader.lr.ph ], [ %add13, %for.cond4.for.end_crit_edge ]    %X.promoted = load i32, i32* @X @@ -145,7 +151,8 @@ for.body6:                                        ; preds = %for.body6, %for.bod    br i1 %exitcond, label %for.cond4.for.end_crit_edge, label %for.body6  for.cond4.for.end_crit_edge:                      ; preds = %for.body6 -  store i32 %add, i32* @X +  %add.lcssa = phi i32 [ %add, %for.body6 ] +  store i32 %add.lcssa, i32* @X    %arrayidx12 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @B, i64 0, i64 %indvars.iv37, i64 %indvars.iv41    %1 = load i32, i32* %arrayidx12    %add13 = add nsw i32 %add1334, %1 @@ -155,7 +162,8 @@ for.cond4.for.end_crit_edge:                      ; preds = %for.body6    br i1 %exitcond40, label %for.cond1.for.inc17_crit_edge, label %for.body6.lr.ph  for.cond1.for.inc17_crit_edge:                    ; preds = %for.cond4.for.end_crit_edge -  store i32 %add13, i32* @Y +  %add13.lcssa = phi i32 [ %add13, %for.cond4.for.end_crit_edge ] +  store i32 %add13.lcssa, i32* @Y    %indvars.iv.next42 = add nuw nsw i64 %indvars.iv41, 1    %lftr.wideiv43 = trunc i64 %indvars.iv.next42 to i32    %exitcond44 = icmp eq i32 %lftr.wideiv43, %N @@ -181,7 +189,7 @@ entry:    %cmp28 = icmp sgt i32 %N, 1    br i1 %cmp28, label %for.cond4.preheader.preheader, label %for.end15 -for.cond4.preheader.preheader:                    ; preds = %entry, %for.inc13 +for.cond4.preheader.preheader:                    ; preds = %for.inc13, %entry    %i.029 = phi i32 [ %inc14, %for.inc13 ], [ 1, %entry ]    br label %for.body6.lr.ph @@ -205,8 +213,10 @@ for.body6:                                        ; preds = %for.body6, %for.bod    br i1 %exitcond, label %for.cond4.for.inc10_crit_edge, label %for.body6  for.cond4.for.inc10_crit_edge:                    ; preds = %for.body6 -  store i32 %add, i32* @X -  store i32 %add9, i32* @Y +  %add.lcssa = phi i32 [ %add, %for.body6 ] +  %add9.lcssa = phi i32 [ %add9, %for.body6 ] +  store i32 %add.lcssa, i32* @X +  store i32 %add9.lcssa, i32* @Y    %indvars.iv.next31 = add nuw nsw i64 %indvars.iv30, 1    %lftr.wideiv32 = trunc i64 %indvars.iv.next31 to i32    %exitcond33 = icmp eq i32 %lftr.wideiv32, %N @@ -231,7 +241,7 @@ entry:    %cmp16 = icmp sgt i32 %N, 1    br i1 %cmp16, label %for.body7.lr.ph, label %for.end8 -for.body7.lr.ph:                                  ; preds = %entry, %for.cond1.for.inc6_crit_edge +for.body7.lr.ph:                                  ; preds = %for.cond1.for.inc6_crit_edge, %entry    %indvars.iv18 = phi i64 [ %indvars.iv.next19, %for.cond1.for.inc6_crit_edge ], [ 1, %entry ]    %X.promoted = load i32, i32* @X    br label %for.body7 @@ -248,15 +258,15 @@ for.body7:                                        ; preds = %for.body7, %for.bod    br i1 %exitcond, label %for.cond1.for.inc6_crit_edge, label %for.body7  for.cond1.for.inc6_crit_edge:                     ; preds = %for.body7 -  store i32 %add, i32* @X +  %add.lcssa = phi i32 [ %add, %for.body7 ] +  store i32 %add.lcssa, i32* @X    %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1    %lftr.wideiv20 = trunc i64 %indvars.iv.next19 to i32    %exitcond21 = icmp eq i32 %lftr.wideiv20, %N    br i1 %exitcond21, label %for.end8, label %for.body7.lr.ph  for.end8:                                         ; preds = %for.cond1.for.inc6_crit_edge, %entry -  %add.res = phi i32 [ %add, %for.cond1.for.inc6_crit_edge], [ 0, %entry ] +  %add.res = phi i32 [ %add.lcssa, %for.cond1.for.inc6_crit_edge ], [ 0, %entry ]    store i32 %add.res, i32* @Y -    ret void  }  | 

