summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/SLPVectorizer/SystemZ
diff options
context:
space:
mode:
authorEric Christopher <echristo@gmail.com>2019-04-17 04:52:47 +0000
committerEric Christopher <echristo@gmail.com>2019-04-17 04:52:47 +0000
commitcee313d288a4faf0355d76fb6e0e927e211d08a5 (patch)
treed386075318d761197779a96e5d8fc0dc7b06342b /llvm/test/Transforms/SLPVectorizer/SystemZ
parentc3d6a929fdd92fd06d4304675ade8d7210ee711a (diff)
downloadbcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.tar.gz
bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.zip
Revert "Temporarily Revert "Add basic loop fusion pass.""
The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552
Diffstat (limited to 'llvm/test/Transforms/SLPVectorizer/SystemZ')
-rw-r--r--llvm/test/Transforms/SLPVectorizer/SystemZ/SLP-cmp-cost-query.ll36
-rw-r--r--llvm/test/Transforms/SLPVectorizer/SystemZ/lit.local.cfg3
-rw-r--r--llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll52
3 files changed, 91 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/SLP-cmp-cost-query.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/SLP-cmp-cost-query.ll
new file mode 100644
index 00000000000..1a32f659066
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/SLP-cmp-cost-query.ll
@@ -0,0 +1,36 @@
+; REQUIRES: asserts
+; RUN: opt -mtriple=systemz-unknown -mcpu=z13 -slp-vectorizer -debug-only=SLP \
+; RUN: -S -disable-output < %s 2>&1 | FileCheck %s
+;
+; Check that SLP vectorizer gets the right cost difference for a compare
+; node.
+
+; Function Attrs: norecurse nounwind readonly
+define void @fun(i8* nocapture, i32 zeroext) local_unnamed_addr #0 {
+.lr.ph.preheader:
+ br label %.lr.ph
+
+.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph
+ %2 = phi i32 [ %., %.lr.ph ], [ undef, %.lr.ph.preheader ]
+ %3 = phi i32 [ %.9, %.lr.ph ], [ undef, %.lr.ph.preheader ]
+ %4 = icmp ult i32 %2, %1
+ %5 = select i1 %4, i32 0, i32 %1
+ %. = sub i32 %2, %5
+ %6 = icmp ult i32 %3, %1
+ %7 = select i1 %6, i32 0, i32 %1
+ %.9 = sub i32 %3, %7
+ %8 = zext i32 %. to i64
+ %9 = getelementptr inbounds i8, i8* %0, i64 %8
+ %10 = load i8, i8* %9, align 1
+ %11 = zext i32 %.9 to i64
+ %12 = getelementptr inbounds i8, i8* %0, i64 %11
+ %13 = load i8, i8* %12, align 1
+ %14 = icmp eq i8 %10, %13
+ br i1 %14, label %.lr.ph, label %._crit_edge
+
+._crit_edge: ; preds = %.lr.ph
+ ret void
+
+; CHECK: SLP: Adding cost -1 for bundle that starts with %4 = icmp ult i32 %2, %1.
+}
+
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/lit.local.cfg b/llvm/test/Transforms/SLPVectorizer/SystemZ/lit.local.cfg
new file mode 100644
index 00000000000..5c02dd3614a
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'SystemZ' in config.root.targets:
+ config.unsupported = True
+
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
new file mode 100644
index 00000000000..3855834d8a1
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=systemz-unknown -mcpu=z13 -slp-vectorizer -S < %s | FileCheck %s
+
+@bar = external global [4 x [4 x i32]], align 4
+@dct_luma = external global [4 x [4 x i32]], align 4
+
+define void @foo() local_unnamed_addr {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ADD277:%.*]] = add nsw i32 undef, undef
+; CHECK-NEXT: store i32 [[ADD277]], i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 1), align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 0), align 4
+; CHECK-NEXT: [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 0
+; CHECK-NEXT: [[ARRAYIDX372_1:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2), align 4
+; CHECK-NEXT: [[ARRAYIDX372_2:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 2
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 3), align 4
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[ADD277]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 2
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 3
+; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> undef, [[TMP6]]
+; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i32> [[TMP7]], <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT: [[ARRAYIDX372_3:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX372]] to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4
+; CHECK-NEXT: unreachable
+;
+entry:
+ %add277 = add nsw i32 undef, undef
+ store i32 %add277, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 1), align 4
+ %0 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 0), align 4
+ %sub355 = add nsw i32 undef, %0
+ %shr.i = ashr i32 %sub355, 6
+ %arrayidx372 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 0
+ store i32 %shr.i, i32* %arrayidx372, align 4
+ %sub355.1 = add nsw i32 undef, %add277
+ %shr.i.1 = ashr i32 %sub355.1, 6
+ %arrayidx372.1 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 1
+ store i32 %shr.i.1, i32* %arrayidx372.1, align 4
+ %1 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2), align 4
+ %sub355.2 = add nsw i32 undef, %1
+ %shr.i.2 = ashr i32 %sub355.2, 6
+ %arrayidx372.2 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 2
+ store i32 %shr.i.2, i32* %arrayidx372.2, align 4
+ %2 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 3), align 4
+ %sub355.3 = add nsw i32 undef, %2
+ %shr.i.3 = ashr i32 %sub355.3, 6
+ %arrayidx372.3 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3
+ store i32 %shr.i.3, i32* %arrayidx372.3, align 4
+ unreachable
+}
OpenPOWER on IntegriCloud