summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms
diff options
context:
space:
mode:
authorMatthew Simpson <mssimpso@codeaurora.org>2016-05-16 15:08:20 +0000
committerMatthew Simpson <mssimpso@codeaurora.org>2016-05-16 15:08:20 +0000
commite43198dc4b8bcce0518575d16bff52a6d38cea1f (patch)
tree3482cecfd428dc00f9eb34a582d0536aab9c8a4e /llvm/test/Transforms
parent3df287923275340cb16cf597b93353a4910e3331 (diff)
downloadbcm5719-llvm-e43198dc4b8bcce0518575d16bff52a6d38cea1f.tar.gz
bcm5719-llvm-e43198dc4b8bcce0518575d16bff52a6d38cea1f.zip
[LV] Ensure safe VF for loops with interleaved accesses
The selection of the vectorization factor currently doesn't consider interleaved accesses. The vectorization factor is based on the maximum safe dependence distance computed by LAA. However, for loops with interleaved groups, we should instead base the vectorization factor on the maximum safe dependence distance divided by the maximum interleave factor of all the interleaved groups. Interleaved accesses not in a group will be scalarized. Differential Revision: http://reviews.llvm.org/D20241 llvm-svn: 269659
Diffstat (limited to 'llvm/test/Transforms')
-rw-r--r--llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll56
1 files changed, 56 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll b/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll
new file mode 100644
index 00000000000..ad1dd0edc3b
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -force-vector-interleave=1 -enable-conflict-detection=false -loop-vectorize -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%struct.pair = type { i32, i32 }
+
+; Check vectorization of interleaved access groups with positive dependence
+; distances. In this test, the maximum safe dependence distance for
+; vectorization is 16 bytes. Normally, this would lead to a maximum VF of 4.
+; However, for interleaved groups, the effective VF is VF * IF, where IF is the
+; interleave factor. Here, the maximum safe dependence distance is recomputed
+; as 16 / IF bytes, resulting in VF=2. Since IF=2, we should generate <4 x i32>
+; loads and stores instead of <8 x i32> accesses.
+;
+; Note: LAA's conflict detection optimization has to be disabled for this test
+; to be vectorized.
+
+; struct pair {
+; int x;
+; int y;
+; };
+;
+; void max_vf(struct pair *restrict p) {
+; for (int i = 0; i < 1000; i++) {
+; p[i + 2].x = p[i].x
+; p[i + 2].y = p[i].y
+; }
+; }
+
+; CHECK-LABEL: @max_vf
+; CHECK: load <4 x i32>
+; CHECK: store <4 x i32>
+
+define void @max_vf(%struct.pair* noalias nocapture %p) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %0 = add nuw nsw i64 %i, 2
+ %p_i.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 0
+ %p_i_plus_2.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 0
+ %1 = load i32, i32* %p_i.x, align 4
+ store i32 %1, i32* %p_i_plus_2.x, align 4
+ %p_i.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 1
+ %p_i_plus_2.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 1
+ %2 = load i32, i32* %p_i.y, align 4
+ store i32 %2, i32* %p_i_plus_2.y, align 4
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp eq i64 %i.next, 1000
+ br i1 %cond, label %for.exit, label %for.body
+
+for.exit:
+ ret void
+}
OpenPOWER on IntegriCloud