Reapply r257800 with fix

The fix uniques the bundle of getelementptr indices we are about to vectorize since it's possible for the same index to be used by multiple instructions. The original commit message is below. [SLP] Vectorize the index computations of getelementptr instructions. This patch seeds the SLP vectorizer with getelementptr indices. The primary motivation in doing so is to vectorize gather-like idioms beginning with consecutive loads (e.g., g[a[0] - b[0]] + g[a[1] - b[1]] + ...). While these cases could be vectorized with a top-down phase, seeding the existing bottom-up phase with the index computations avoids the complexity, compile-time, and phase ordering issues associated with a full top-down pass. Only bundles of single-index getelementptrs with non-constant differences are considered for vectorization. llvm-svn: 257918
author: Matthew Simpson <mssimpso@codeaurora.org> 2016-01-15 18:51:51 +0000
committer: Matthew Simpson <mssimpso@codeaurora.org> 2016-01-15 18:51:51 +0000
commit: 57fe1b10db94080f7acd932578b103ac793eb28a (patch)
tree: 947a6fe51c42bd93a244ddbed336aa42dc486315 /llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
parent: b4adf55e0f050a494cc0c9a98b0bd46e8e30070a (diff)
download: bcm5719-llvm-57fe1b10db94080f7acd932578b103ac793eb28a.tar.gz
bcm5719-llvm-57fe1b10db94080f7acd932578b103ac793eb28a.zip
1 files changed, 111 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
new file mode 100644
index 00000000000..e9b71963530
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
@@ -0,0 +1,111 @@
+; RUN: opt -S -slp-vectorizer -slp-threshold=-18 -dce -instcombine < %s | FileCheck %s
+
+target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; These tests check that we remove from consideration pairs of seed
+; getelementptrs when they are known to have a constant difference. Such pairs
+; are likely not good candidates for vectorization since one can be computed
+; from the other. We use an unprofitable threshold to force vectorization.
+;
+; int getelementptr(int *g, int n, int w, int x, int y, int z) {
+;   int sum = 0;
+;   for (int i = 0; i < n ; ++i) {
+;     sum += g[2*i + w]; sum += g[2*i + x];
+;     sum += g[2*i + y]; sum += g[2*i + z];
+;   }
+;   return sum;
+; }
+;
+
+; CHECK-LABEL: @getelementptr_4x32
+;
+; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <4 x i32>
+; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <4 x i32> [[A]]
+; CHECK: sext i32 [[X]] to i64
+;
+define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
+entry:
+  %cmp31 = icmp sgt i32 %n, 0
+  br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ]
+  ret i32 %sum.0.lcssa
+
+for.body:
+  %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
+  %t4 = shl nsw i32 %indvars.iv, 1
+  %t5 = add nsw i32 %t4, 0
+  %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
+  %t6 = load i32, i32* %arrayidx, align 4
+  %add1 = add nsw i32 %t6, %sum.032
+  %t7 = add nsw i32 %t4, %x
+  %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
+  %t8 = load i32, i32* %arrayidx5, align 4
+  %add6 = add nsw i32 %add1, %t8
+  %t9 = add nsw i32 %t4, %y
+  %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
+  %t10 = load i32, i32* %arrayidx10, align 4
+  %add11 = add nsw i32 %add6, %t10
+  %t11 = add nsw i32 %t4, %z
+  %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
+  %t12 = load i32, i32* %arrayidx15, align 4
+  %add16 = add nsw i32 %add11, %t12
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next , %n
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+; CHECK-LABEL: @getelementptr_2x32
+;
+; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <2 x i32>
+; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[A]]
+; CHECK: sext i32 [[X]] to i64
+;
+define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
+entry:
+  %cmp31 = icmp sgt i32 %n, 0
+  br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ]
+  ret i32 %sum.0.lcssa
+
+for.body:
+  %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+  %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
+  %t4 = shl nsw i32 %indvars.iv, 1
+  %t5 = add nsw i32 %t4, 0
+  %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
+  %t6 = load i32, i32* %arrayidx, align 4
+  %add1 = add nsw i32 %t6, %sum.032
+  %t7 = add nsw i32 %t4, 1
+  %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
+  %t8 = load i32, i32* %arrayidx5, align 4
+  %add6 = add nsw i32 %add1, %t8
+  %t9 = add nsw i32 %t4, %y
+  %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
+  %t10 = load i32, i32* %arrayidx10, align 4
+  %add11 = add nsw i32 %add6, %t10
+  %t11 = add nsw i32 %t4, %z
+  %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
+  %t12 = load i32, i32* %arrayidx15, align 4
+  %add16 = add nsw i32 %add11, %t12
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next , %n
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
author	Matthew Simpson <mssimpso@codeaurora.org>	2016-01-15 18:51:51 +0000
committer	Matthew Simpson <mssimpso@codeaurora.org>	2016-01-15 18:51:51 +0000
commit	57fe1b10db94080f7acd932578b103ac793eb28a (patch)
tree	947a6fe51c42bd93a244ddbed336aa42dc486315 /llvm/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
parent	b4adf55e0f050a494cc0c9a98b0bd46e8e30070a (diff)
download	bcm5719-llvm-57fe1b10db94080f7acd932578b103ac793eb28a.tar.gz bcm5719-llvm-57fe1b10db94080f7acd932578b103ac793eb28a.zip