[SLPVectorizer] Failure to beneficially vectorize 'copyable' elements in integer binary ops.

Patch tries to improve vectorization of the following code: void add1(int * __restrict dst, const int * __restrict src) { *dst++ = *src++; *dst++ = *src++ + 1; *dst++ = *src++ + 2; *dst++ = *src++ + 3; } Allows to vectorize even if the very first operation is not a binary add, but just a load. Fixed PR34619 and other issues related to previous commit. Reviewers: spatel, mzolotukhin, mkuper, hfinkel, RKSimon, filcab, ABataev Reviewed By: ABataev, RKSimon Subscribers: llvm-commits, RKSimon Differential Revision: https://reviews.llvm.org/D28907 llvm-svn: 317618
author: Dinar Temirbulatov <dtemirbulatov@gmail.com> 2017-11-07 21:25:34 +0000
committer: Dinar Temirbulatov <dtemirbulatov@gmail.com> 2017-11-07 21:25:34 +0000
commit: b9a2832874d01d6692117d9b78d1c8aa56dba063 (patch)
tree: 965364d571882a4b945e3eb3b1cdce81999982ee /llvm/test/Transforms/SLPVectorizer/SystemZ
parent: 40d6663367b7e7c0cc5c3686e18fe75facd30403 (diff)
download: bcm5719-llvm-b9a2832874d01d6692117d9b78d1c8aa56dba063.tar.gz
bcm5719-llvm-b9a2832874d01d6692117d9b78d1c8aa56dba063.zip
1 files changed, 52 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
new file mode 100644
index 00000000000..3855834d8a1
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=systemz-unknown -mcpu=z13 -slp-vectorizer -S < %s | FileCheck %s
+
+@bar = external global [4 x [4 x i32]], align 4
+@dct_luma = external global [4 x [4 x i32]], align 4
+
+define void @foo() local_unnamed_addr {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ADD277:%.*]] = add nsw i32 undef, undef
+; CHECK-NEXT:    store i32 [[ADD277]], i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 1), align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 0), align 4
+; CHECK-NEXT:    [[ARRAYIDX372:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 0
+; CHECK-NEXT:    [[ARRAYIDX372_1:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2), align 4
+; CHECK-NEXT:    [[ARRAYIDX372_2:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 3), align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[ADD277]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 2
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 3
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> undef, [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr <4 x i32> [[TMP7]], <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT:    [[ARRAYIDX372_3:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX372]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %add277 = add nsw i32 undef, undef
+  store i32 %add277, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 1), align 4
+  %0 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 0), align 4
+  %sub355 = add nsw i32 undef, %0
+  %shr.i = ashr i32 %sub355, 6
+  %arrayidx372 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 0
+  store i32 %shr.i, i32* %arrayidx372, align 4
+  %sub355.1 = add nsw i32 undef, %add277
+  %shr.i.1 = ashr i32 %sub355.1, 6
+  %arrayidx372.1 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 1
+  store i32 %shr.i.1, i32* %arrayidx372.1, align 4
+  %1 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 2), align 4
+  %sub355.2 = add nsw i32 undef, %1
+  %shr.i.2 = ashr i32 %sub355.2, 6
+  %arrayidx372.2 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 2
+  store i32 %shr.i.2, i32* %arrayidx372.2, align 4
+  %2 = load i32, i32* getelementptr inbounds ([4 x [4 x i32]], [4 x [4 x i32]]* @bar, i64 0, i64 3, i64 3), align 4
+  %sub355.3 = add nsw i32 undef, %2
+  %shr.i.3 = ashr i32 %sub355.3, 6
+  %arrayidx372.3 = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3
+  store i32 %shr.i.3, i32* %arrayidx372.3, align 4
+  unreachable
+}
author	Dinar Temirbulatov <dtemirbulatov@gmail.com>	2017-11-07 21:25:34 +0000
committer	Dinar Temirbulatov <dtemirbulatov@gmail.com>	2017-11-07 21:25:34 +0000
commit	b9a2832874d01d6692117d9b78d1c8aa56dba063 (patch)
tree	965364d571882a4b945e3eb3b1cdce81999982ee /llvm/test/Transforms/SLPVectorizer/SystemZ
parent	40d6663367b7e7c0cc5c3686e18fe75facd30403 (diff)
download	bcm5719-llvm-b9a2832874d01d6692117d9b78d1c8aa56dba063.tar.gz bcm5719-llvm-b9a2832874d01d6692117d9b78d1c8aa56dba063.zip