summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoopUnroll/PowerPC
diff options
context:
space:
mode:
authorEric Christopher <echristo@gmail.com>2019-04-17 04:52:47 +0000
committerEric Christopher <echristo@gmail.com>2019-04-17 04:52:47 +0000
commitcee313d288a4faf0355d76fb6e0e927e211d08a5 (patch)
treed386075318d761197779a96e5d8fc0dc7b06342b /llvm/test/Transforms/LoopUnroll/PowerPC
parentc3d6a929fdd92fd06d4304675ade8d7210ee711a (diff)
downloadbcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.tar.gz
bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.zip
Revert "Temporarily Revert "Add basic loop fusion pass.""
The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552
Diffstat (limited to 'llvm/test/Transforms/LoopUnroll/PowerPC')
-rw-r--r--llvm/test/Transforms/LoopUnroll/PowerPC/a2-high-cost-trip-count-computation.ll27
-rw-r--r--llvm/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll34
-rw-r--r--llvm/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg3
-rw-r--r--llvm/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll50
-rw-r--r--llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll74
5 files changed, 188 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/a2-high-cost-trip-count-computation.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/a2-high-cost-trip-count-computation.ll
new file mode 100644
index 00000000000..bd5f68f9f20
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/a2-high-cost-trip-count-computation.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+;; Check that we do emit expensive instructions to compute trip
+;; counts when unrolling loops on the a2 (because we unroll a lot).
+
+define i32 @test(i64 %v12, i8* %array, i64* %loc) {
+; CHECK-LABEL: @test(
+; CHECK: udiv
+entry:
+ %step = load i64, i64* %loc, !range !0
+ br label %loop
+
+loop: ; preds = %entry, %loop
+ %k.015 = phi i64 [ %v15, %loop ], [ %v12, %entry ]
+ %v14 = getelementptr inbounds i8, i8* %array, i64 %k.015
+ store i8 0, i8* %v14
+ %v15 = add nuw nsw i64 %k.015, %step
+ %v16 = icmp slt i64 %v15, 8193
+ br i1 %v16, label %loop, label %loopexit
+
+loopexit: ; preds = %loop
+ ret i32 0
+}
+
+!0 = !{i64 1, i64 100}
diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
new file mode 100644
index 00000000000..6b9d31e2d99
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.02
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ ret i32 %sum.0.lcssa
+}
+
+; EPILOG-LABEL: @test
+; EPILOG: for.body:
+; EPILOG: br i1 %niter.ncmp.7, label %for.end.loopexit{{.*}}, label %for.body
+; EPILOG: for.body.epil{{.*}}:
+
+; PROLOG-LABEL: @test
+; PROLOG: for.body.prol{{.*}}:
+; PROLOG: for.body:
+; PROLOG: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+
diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg b/llvm/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
new file mode 100644
index 00000000000..5d33887ff0a
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'PowerPC' in config.root.targets:
+ config.unsupported = True
+
diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll
new file mode 100644
index 00000000000..c9677d83e37
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s
+define void @unroll_default() nounwind {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %inc = add i32 %iv, 1
+ %exitcnd = icmp uge i32 %inc, 1024
+ br i1 %exitcnd, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; CHECK-LABEL: @unroll_default
+; CHECK: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+
diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
new file mode 100644
index 00000000000..27998230abe
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -S -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -loop-unroll | FileCheck %s
+; RUN: opt < %s -S -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -loop-unroll | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind
+define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_addr #0 {
+entry:
+ %cmp10 = icmp sgt i32 %k, 0
+ br i1 %cmp10, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %wide.trip.count = zext i32 %k to i64
+ %min.iters.check = icmp ult i32 %k, 16
+ br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
+
+vector.ph: ; preds = %for.body.lr.ph
+ %n.vec = and i64 %wide.trip.count, 4294967280
+ %broadcast.splatinsert = insertelement <16 x i32> undef, i32 %x, i32 0
+ %broadcast.splat = shufflevector <16 x i32> %broadcast.splatinsert, <16 x i32> undef, <16 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %vector.ph
+ %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+ %vec.ind12 = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, %vector.ph ], [ %vec.ind.next13, %vector.body ]
+ %0 = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %vec.ind12
+ %1 = and <16 x i32> %0, %broadcast.splat
+ %2 = icmp eq <16 x i32> %1, zeroinitializer
+ %3 = select <16 x i1> %2, <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
+ %4 = getelementptr inbounds i8, i8* %s, i64 %index
+ %5 = bitcast i8* %4 to <16 x i8>*
+ store <16 x i8> %3, <16 x i8>* %5, align 1
+ %index.next = add i64 %index, 16
+ %vec.ind.next13 = add <16 x i32> %vec.ind12, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ %6 = icmp eq i64 %index.next, %n.vec
+ br i1 %6, label %middle.block, label %vector.body
+
+middle.block: ; preds = %vector.body
+ %cmp.n = icmp eq i64 %n.vec, %wide.trip.count
+ br i1 %cmp.n, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %middle.block, %for.body.lr.ph
+ %indvars.iv.ph = phi i64 [ 0, %for.body.lr.ph ], [ %n.vec, %middle.block ]
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
+ %7 = trunc i64 %indvars.iv to i32
+ %shl = shl i32 1, %7
+ %and = and i32 %shl, %x
+ %tobool = icmp eq i32 %and, 0
+ %conv = select i1 %tobool, i8 48, i8 49
+ %arrayidx = getelementptr inbounds i8, i8* %s, i64 %indvars.iv
+ store i8 %conv, i8* %arrayidx, align 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %middle.block, %entry
+ %idxprom1 = sext i32 %k to i64
+ %arrayidx2 = getelementptr inbounds i8, i8* %s, i64 %idxprom1
+ store i8 0, i8* %arrayidx2, align 1
+ ret i8* %s
+}
+
+
+; CHECK-LABEL: vector.body
+; CHECK: shl
+; CHECK-NEXT: and
+; CHECK: shl
+; CHECK-NEXT: and
+; CHECK: label %vector.body
+
OpenPOWER on IntegriCloud