summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopVersioningLICM
diff options
context:
space:
mode:
authorAshutosh Nema <ashu1212@gmail.com>2016-02-06 07:47:48 +0000
committerAshutosh Nema <ashu1212@gmail.com>2016-02-06 07:47:48 +0000
commitdf6763abe85cabbd1b217e6740c296443c2b436f (patch)
treee85f35a6a0db276223d9b08567cf68acca174647 /llvm/test/Transforms/LoopVersioningLICM
parent0572837eff1648aa238c469405978712ce243066 (diff)
downloadbcm5719-llvm-df6763abe85cabbd1b217e6740c296443c2b436f.tar.gz
bcm5719-llvm-df6763abe85cabbd1b217e6740c296443c2b436f.zip
New Loop Versioning LICM Pass
Summary: When alias analysis is uncertain about the aliasing between any two accesses, it will return MayAlias. This uncertainty from alias analysis restricts LICM from proceeding further. In cases where alias analysis is uncertain we might use loop versioning as an alternative. Loop Versioning will create a version of the loop with aggressive aliasing assumptions in addition to the original with conservative (default) aliasing assumptions. The version of the loop making aggressive aliasing assumptions will have all the memory accesses marked as no-alias. These two versions of loop will be preceded by a memory runtime check. This runtime check consists of bound checks for all unique memory accessed in loop, and it ensures the lack of memory aliasing. The result of the runtime check determines which of the loop versions is executed: If the runtime check detects any memory aliasing, then the original loop is executed. Otherwise, the version with aggressive aliasing assumptions is used. The pass is off by default and can be enabled with command line option -enable-loop-versioning-licm. Reviewers: hfinkel, anemet, chatur01, reames Subscribers: MatzeB, grosser, joker.eph, sanjoy, javed.absar, sbaranga, llvm-commits Differential Revision: http://reviews.llvm.org/D9151 llvm-svn: 259986
Diffstat (limited to 'llvm/test/Transforms/LoopVersioningLICM')
-rw-r--r--llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll66
-rw-r--r--llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll51
-rw-r--r--llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll44
3 files changed, 161 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
new file mode 100644
index 00000000000..89d882e864b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -O1 -S -loop-versioning-licm -licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s
+;
+; Test to confirm loop is a candidate for LoopVersioningLICM.
+; It also confirms invariant moved out of loop.
+;
+; CHECK: Loop: Loop at depth 2 containing: %for.body3<header><latch><exiting>
+; CHECK-NEXT: Loop Versioning found to be beneficial
+;
+; CHECK: for.body3:
+; CHECK-NEXT: %add86 = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ]
+; CHECK-NEXT: %j.113 = phi i32 [ %j.016, %for.body3.ph ], [ %inc, %for.body3 ]
+; CHECK-NEXT: %idxprom = zext i32 %j.113 to i64
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+; CHECK-NEXT: store i32 %add, i32* %arrayidx, align 4, !alias.scope !6, !noalias !6
+; CHECK-NEXT: %add8 = add nsw i32 %add86, %add
+; CHECK-NEXT: %inc = add nuw i32 %j.113, 1
+; CHECK-NEXT: %cmp2 = icmp ult i32 %inc, %itr
+; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit5, !llvm.loop !7
+define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
+entry:
+ %cmp14 = icmp eq i32 %itr, 0
+ br i1 %cmp14, label %for.end13, label %for.cond1.preheader.preheader
+
+for.cond1.preheader.preheader: ; preds = %entry
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc11
+ %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+ %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+ %cmp212 = icmp ult i32 %j.016, %itr
+ br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph: ; preds = %for.cond1.preheader
+ %add = add i32 %i.015, %itr
+ %idxprom6 = zext i32 %i.015 to i64
+ %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6
+ br label %for.body3
+
+for.body3: ; preds = %for.body3.lr.ph, %for.body3
+ %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ]
+ %idxprom = zext i32 %j.113 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+ store i32 %add, i32* %arrayidx, align 4
+ %0 = load i32, i32* %arrayidx7, align 4
+ %add8 = add nsw i32 %0, %add
+ store i32 %add8, i32* %arrayidx7, align 4
+ %inc = add nuw i32 %j.113, 1
+ %cmp2 = icmp ult i32 %inc, %itr
+ br i1 %cmp2, label %for.body3, label %for.inc11.loopexit
+
+for.inc11.loopexit: ; preds = %for.body3
+ br label %for.inc11
+
+for.inc11: ; preds = %for.inc11.loopexit, %for.cond1.preheader
+ %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.inc11.loopexit ]
+ %inc12 = add nuw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc12, %itr
+ br i1 %cmp, label %for.cond1.preheader, label %for.end13.loopexit
+
+for.end13.loopexit: ; preds = %for.inc11
+ br label %for.end13
+
+for.end13: ; preds = %for.end13.loopexit, %entry
+ ret i32 0
+}
+
diff --git a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll
new file mode 100644
index 00000000000..62612c76781
--- /dev/null
+++ b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -O1 -S -loop-versioning-licm -licm -debug-only=loop-versioning-licm -disable-loop-unrolling 2>&1 | FileCheck %s
+;
+; Test to confirm loop is a good candidate for LoopVersioningLICM
+; It also confirms invariant moved out of loop.
+;
+; CHECK: Loop: Loop at depth 2 containing: %for.body3.us<header><latch><exiting>
+; CHECK-NEXT: Loop Versioning found to be beneficial
+;
+; CHECK: for.cond1.for.inc17_crit_edge.us.loopexit5: ; preds = %for.body3.us
+; CHECK-NEXT: %add14.us.lcssa = phi float [ %add14.us, %for.body3.us ]
+; CHECK-NEXT: store float %add14.us.lcssa, float* %arrayidx.us, align 4, !alias.scope !7, !noalias !8
+; CHECK-NEXT: br label %for.cond1.for.inc17_crit_edge.us
+;
+define i32 @foo(float* nocapture %var2, float** nocapture readonly %var3, i32 %itr) #0 {
+entry:
+ %cmp38 = icmp sgt i32 %itr, 1
+ br i1 %cmp38, label %for.body3.lr.ph.us, label %for.end19
+
+for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us
+ %0 = phi float [ %.pre, %for.body3.lr.ph.us ], [ %add14.us, %for.body3.us ]
+ %indvars.iv = phi i64 [ 1, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ]
+ %1 = trunc i64 %indvars.iv to i32
+ %conv.us = sitofp i32 %1 to float
+ %add.us = fadd float %conv.us, %0
+ %arrayidx7.us = getelementptr inbounds float, float* %3, i64 %indvars.iv
+ store float %add.us, float* %arrayidx7.us, align 4
+ %2 = load float, float* %arrayidx.us, align 4
+ %add14.us = fadd float %2, %add.us
+ store float %add14.us, float* %arrayidx.us, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %itr
+ br i1 %exitcond, label %for.cond1.for.inc17_crit_edge.us, label %for.body3.us
+
+for.body3.lr.ph.us: ; preds = %entry, %for.cond1.for.inc17_crit_edge.us
+ %indvars.iv40 = phi i64 [ %indvars.iv.next41, %for.cond1.for.inc17_crit_edge.us ], [ 1, %entry ]
+ %arrayidx.us = getelementptr inbounds float, float* %var2, i64 %indvars.iv40
+ %arrayidx6.us = getelementptr inbounds float*, float** %var3, i64 %indvars.iv40
+ %3 = load float*, float** %arrayidx6.us, align 8
+ %.pre = load float, float* %arrayidx.us, align 4
+ br label %for.body3.us
+
+for.cond1.for.inc17_crit_edge.us: ; preds = %for.body3.us
+ %indvars.iv.next41 = add nuw nsw i64 %indvars.iv40, 1
+ %lftr.wideiv42 = trunc i64 %indvars.iv.next41 to i32
+ %exitcond43 = icmp eq i32 %lftr.wideiv42, %itr
+ br i1 %exitcond43, label %for.end19, label %for.body3.lr.ph.us
+
+for.end19: ; preds = %for.cond1.for.inc17_crit_edge.us, %entry
+ ret i32 0
+}
diff --git a/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll
new file mode 100644
index 00000000000..3e625f4f71e
--- /dev/null
+++ b/llvm/test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -O1 -S -loop-versioning-licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s
+;
+; Test to confirm loop is not a candidate for LoopVersioningLICM.
+;
+; CHECK: Loop: Loop at depth 2 containing: %for.body3<header><latch><exiting>
+; CHECK-NEXT: LAA: Runtime check not found !!
+; CHECK-NEXT: Loop instructions not suitable for LoopVersioningLICM
+
+define i32 @foo(i32* nocapture %var1, i32 %itr) #0 {
+entry:
+ %cmp18 = icmp eq i32 %itr, 0
+ br i1 %cmp18, label %for.end8, label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.inc6
+ %j.020 = phi i32 [ %j.1.lcssa, %for.inc6 ], [ 0, %entry ]
+ %i.019 = phi i32 [ %inc7, %for.inc6 ], [ 0, %entry ]
+ %cmp216 = icmp ult i32 %j.020, %itr
+ br i1 %cmp216, label %for.body3.lr.ph, label %for.inc6
+
+for.body3.lr.ph: ; preds = %for.cond1.preheader
+ %0 = zext i32 %j.020 to i64
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %for.body3.lr.ph
+ %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %1, %itr
+ store i32 %add, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %itr
+ br i1 %exitcond, label %for.inc6, label %for.body3
+
+for.inc6: ; preds = %for.body3, %for.cond1.preheader
+ %j.1.lcssa = phi i32 [ %j.020, %for.cond1.preheader ], [ %itr, %for.body3 ]
+ %inc7 = add nuw i32 %i.019, 1
+ %exitcond21 = icmp eq i32 %inc7, %itr
+ br i1 %exitcond21, label %for.end8, label %for.cond1.preheader
+
+for.end8: ; preds = %for.inc6, %entry
+ ret i32 0
+}
+
OpenPOWER on IntegriCloud