[LoopInterchange] Support reductions across inner and outer loop.

This patch adds logic to detect reductions across the inner and outer loop by following the incoming values of PHI nodes in the outer loop. If the incoming values take part in a reduction in the inner loop or come from outside the outer loop, we found a reduction spanning across inner and outer loop. With this change, ~10% more loops are interchanged in the LLVM test-suite + SPEC2006. Fixes https://bugs.llvm.org/show_bug.cgi?id=30472 Reviewers: mcrosier, efriedma, karthikthecool, davide, hfinkel, dmgreen Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D43245 llvm-svn: 346438
author: Florian Hahn <florian.hahn@arm.com> 2018-11-08 20:44:19 +0000
committer: Florian Hahn <florian.hahn@arm.com> 2018-11-08 20:44:19 +0000
commit: a684a99441367b8e0dda222178156e827e93e3c2 (patch)
tree: fae758e619c9ce2a8cf2205873034dddd0f1f4f5 /llvm/test/Transforms
parent: 8cca8bd4aa9a4ade721495f66f9d2bd2afefe452 (diff)
download: bcm5719-llvm-a684a99441367b8e0dda222178156e827e93e3c2.tar.gz
bcm5719-llvm-a684a99441367b8e0dda222178156e827e93e3c2.zip
4 files changed, 205 insertions, 3 deletions
diff --git a/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll b/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
index 74543fb1647..7c77b10dd8e 100644
--- a/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
+++ b/llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
-; RUN:     -verify-dom-info -verify-loop-info 2>&1 | FileCheck -check-prefix=IR %s
+; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 | FileCheck -check-prefix=IR %s
 ; RUN: FileCheck --input-file=%t %s
 
 ; Inner loop only reductions are not supported currently. See discussion at
diff --git a/llvm/test/Transforms/LoopInterchange/lcssa.ll b/llvm/test/Transforms/LoopInterchange/lcssa.ll
index 2bd9ee69c16..edc9800ecd7 100644
--- a/llvm/test/Transforms/LoopInterchange/lcssa.ll
+++ b/llvm/test/Transforms/LoopInterchange/lcssa.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -pass-remarks-output=%t
-; RUN: cat %t |  FileCheck --check-prefix REMARK %s
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -verify-loop-lcssa -pass-remarks-output=%t -S
+; RUN: FileCheck --input-file %t --check-prefix REMARK %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll b/llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll
new file mode 100644
index 00000000000..c58e7d3d272
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/outer-only-reductions.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 | FileCheck -check-prefix=IR %s
+; RUN: FileCheck --input-file=%t %s
+
+; Outer loop only reductions are not supported currently.
+
+@A = common global [500 x [500 x i32]] zeroinitializer
+
+;; global X
+
+;;  for( int i=1;i<N;i++) {
+;;    for( int j=1;j<N;j++)
+;;      ;
+;;    X+=A[j][i];
+;;  }
+
+; CHECK: --- !Missed
+; CHECK-NEXT: Pass:            loop-interchange
+; CHECK-NEXT: Name:            UnsupportedPHI
+; CHECK-NEXT: Function:        reduction_01
+
+; IR-LABEL: @reduction_01(
+; IR-NOT: split
+
+define i32 @reduction_01(i32 %N) {
+entry:
+  br label %outer.header
+
+outer.header:                                  ; preds = %for.cond1.for.inc6_crit_edge, %entry
+  %indvars.iv18 = phi i64 [ %indvars.iv.next19, %outer.inc ], [ 1, %entry ]
+  %add15 = phi i32 [ 0, %entry ], [ %add, %outer.inc ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
+  %indvars.iv = phi i64 [ 1, %outer.header ], [ %indvars.iv.next, %for.body3 ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %outer.inc, label %for.body3
+
+outer.inc:                     ; preds = %for.body3
+  %arrayidx5 = getelementptr inbounds [500 x [500 x i32]], [500 x [500 x i32]]* @A, i64 0, i64 %indvars.iv, i64 %indvars.iv18
+  %0 = load i32, i32* %arrayidx5
+  %add = add nsw i32 %add15, %0
+  %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
+  %lftr.wideiv20 = trunc i64 %indvars.iv.next19 to i32
+  %exitcond21 = icmp eq i32 %lftr.wideiv20, %N
+  br i1 %exitcond21, label %for.end8, label %outer.header
+
+for.end8:                                         ; preds = %for.cond1.for.inc6_crit_edge, %entry
+  ret i32 %add
+}
diff --git a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
new file mode 100644
index 00000000000..7ac96f14825
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
@@ -0,0 +1,150 @@
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \
+; RUN:     -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 | FileCheck %s
+; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; REMARKS: --- !Passed
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            Interchanged
+; REMARKS-NEXT: Function:        test1
+
+define i64 @test1([100 x [100 x i64]]* %Arr) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR2_PREHEADER:%.*]]
+; CHECK:       for1.header.preheader:
+; CHECK-NEXT:    br label [[FOR1_HEADER:%.*]]
+; CHECK:       for1.header:
+; CHECK-NEXT:    [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR1_INC:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ]
+; CHECK-NEXT:    [[SUM_INNER:%.*]] = phi i64 [ [[SUM_INC:%.*]], [[FOR1_INC]] ], [ [[SUM_OUTER:%.*]], [[FOR1_HEADER_PREHEADER]] ]
+; CHECK-NEXT:    br label [[FOR2_SPLIT1:%.*]]
+; CHECK:       for2.preheader:
+; CHECK-NEXT:    br label [[FOR2:%.*]]
+; CHECK:       for2:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ]
+; CHECK-NEXT:    [[SUM_OUTER]] = phi i64 [ [[SUM_INC_LCSSA:%.*]], [[FOR2_SPLIT]] ], [ 0, [[FOR2_PREHEADER]] ]
+; CHECK-NEXT:    br label [[FOR1_HEADER_PREHEADER]]
+; CHECK:       for2.split1:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* [[ARR:%.*]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]]
+; CHECK-NEXT:    [[LV:%.*]] = load i64, i64* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[SUM_INC]] = add i64 [[SUM_INNER]], [[LV]]
+; CHECK-NEXT:    br label [[FOR1_INC]]
+; CHECK:       for2.split:
+; CHECK-NEXT:    [[SUM_INC_LCSSA]] = phi i64 [ [[SUM_INC]], %for1.inc ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXIT1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], 100
+; CHECK-NEXT:    br i1 [[EXIT1]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR2]]
+; CHECK:       for1.inc:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1
+; CHECK-NEXT:    [[EXIT2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT24]], 100
+; CHECK-NEXT:    br i1 [[EXIT2]], label [[FOR2_SPLIT]], label [[FOR1_HEADER]]
+; CHECK:       for1.loopexit:
+; CHECK-NEXT:    [[SUM_INC_LCSSA2:%.*]] = phi i64 [ [[SUM_INC_LCSSA]], [[FOR2_SPLIT]] ]
+; CHECK-NEXT:    ret i64 [[SUM_INC_LCSSA2]]
+;
+entry:
+  br label %for1.header
+
+for1.header:                                         ; preds = %for1.inc, %entry
+  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
+  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
+  br label %for2
+
+for2:                                        ; preds = %for2, %for1.header
+  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
+  %sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
+  %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+  %lv = load i64, i64* %arrayidx, align 4
+  %sum.inc = add i64 %sum.inner, %lv
+  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
+  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
+  br i1 %exit1, label %for1.inc, label %for2
+
+for1.inc:                                ; preds = %for2
+  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %exit2 = icmp eq i64 %indvars.iv.next24, 100
+  br i1 %exit2, label %for1.loopexit, label %for1.header
+
+for1.loopexit:                                 ; preds = %for1.inc
+  %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
+  ret i64 %sum.inc.lcssa2
+}
+
+; In this test case, the inner reduction PHI %inner does not involve the outer
+; reduction PHI %sum.outer, do not interchange.
+; REMARKS: --- !Missed
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            UnsupportedPHIOuter
+; REMARKS-NEXT: Function:        test2
+
+define i64 @test2([100 x [100 x i64]]* %Arr) {
+entry:
+  br label %for1.header
+
+for1.header:                                         ; preds = %for1.inc, %entry
+  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
+  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
+  br label %for2
+
+for2:                                        ; preds = %for2, %for1.header
+  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
+  %inner = phi i64 [ %indvars.iv23, %for1.header ], [ %sum.inc, %for2 ]
+  %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+  %lv = load i64, i64* %arrayidx, align 4
+  %sum.inc = add i64 %inner, %lv
+  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
+  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
+  br i1 %exit1, label %for1.inc, label %for2
+
+for1.inc:                                ; preds = %for2
+  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %exit2 = icmp eq i64 %indvars.iv.next24, 100
+  br i1 %exit2, label %for1.loopexit, label %for1.header
+
+for1.loopexit:                                 ; preds = %for1.inc
+  %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
+  ret i64 %sum.inc.lcssa2
+}
+
+; Check that we do not interchange if there is an additional instruction
+; between the outer and inner reduction PHIs.
+; REMARKS: --- !Missed
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            UnsupportedPHIOuter
+; REMARKS-NEXT: Function:        test3
+
+define i64 @test3([100 x [100 x i64]]* %Arr) {
+entry:
+  br label %for1.header
+
+for1.header:                                         ; preds = %for1.inc, %entry
+  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
+  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
+  %so = add i64 %sum.outer, 10
+  br label %for2
+
+for2:                                        ; preds = %for2, %for1.header
+  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
+  %sum.inner = phi i64 [ %so, %for1.header ], [ %sum.inc, %for2 ]
+  %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+  %lv = load i64, i64* %arrayidx, align 4
+  %sum.inc = add i64 %sum.inner, %lv
+  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
+  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
+  br i1 %exit1, label %for1.inc, label %for2
+
+for1.inc:                                ; preds = %for2
+  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %exit2 = icmp eq i64 %indvars.iv.next24, 100
+  br i1 %exit2, label %for1.loopexit, label %for1.header
+
+for1.loopexit:                                 ; preds = %for1.inc
+  %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
+  ret i64 %sum.inc.lcssa2
+}
author	Florian Hahn <florian.hahn@arm.com>	2018-11-08 20:44:19 +0000
committer	Florian Hahn <florian.hahn@arm.com>	2018-11-08 20:44:19 +0000
commit	a684a99441367b8e0dda222178156e827e93e3c2 (patch)
tree	fae758e619c9ce2a8cf2205873034dddd0f1f4f5 /llvm/test/Transforms
parent	8cca8bd4aa9a4ade721495f66f9d2bd2afefe452 (diff)
download	bcm5719-llvm-a684a99441367b8e0dda222178156e827e93e3c2.tar.gz bcm5719-llvm-a684a99441367b8e0dda222178156e827e93e3c2.zip