diff options
| author | Matthew Simpson <mssimpso@codeaurora.org> | 2016-05-16 15:08:20 +0000 |
|---|---|---|
| committer | Matthew Simpson <mssimpso@codeaurora.org> | 2016-05-16 15:08:20 +0000 |
| commit | e43198dc4b8bcce0518575d16bff52a6d38cea1f (patch) | |
| tree | 3482cecfd428dc00f9eb34a582d0536aab9c8a4e /llvm/test/Transforms | |
| parent | 3df287923275340cb16cf597b93353a4910e3331 (diff) | |
| download | bcm5719-llvm-e43198dc4b8bcce0518575d16bff52a6d38cea1f.tar.gz bcm5719-llvm-e43198dc4b8bcce0518575d16bff52a6d38cea1f.zip | |
[LV] Ensure safe VF for loops with interleaved accesses
The selection of the vectorization factor currently doesn't consider
interleaved accesses. The vectorization factor is based on the maximum safe
dependence distance computed by LAA. However, for loops with interleaved
groups, we should instead base the vectorization factor on the maximum safe
dependence distance divided by the maximum interleave factor of all the
interleaved groups. Interleaved accesses not in a group will be scalarized.
Differential Revision: http://reviews.llvm.org/D20241
llvm-svn: 269659
Diffstat (limited to 'llvm/test/Transforms')
| -rw-r--r-- | llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll b/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll new file mode 100644 index 00000000000..ad1dd0edc3b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -force-vector-interleave=1 -enable-conflict-detection=false -loop-vectorize -dce -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +%struct.pair = type { i32, i32 } + +; Check vectorization of interleaved access groups with positive dependence +; distances. In this test, the maximum safe dependence distance for +; vectorization is 16 bytes. Normally, this would lead to a maximum VF of 4. +; However, for interleaved groups, the effective VF is VF * IF, where IF is the +; interleave factor. Here, the maximum safe dependence distance is recomputed +; as 16 / IF bytes, resulting in VF=2. Since IF=2, we should generate <4 x i32> +; loads and stores instead of <8 x i32> accesses. +; +; Note: LAA's conflict detection optimization has to be disabled for this test +; to be vectorized. + +; struct pair { +; int x; +; int y; +; }; +; +; void max_vf(struct pair *restrict p) { +; for (int i = 0; i < 1000; i++) { +; p[i + 2].x = p[i].x +; p[i + 2].y = p[i].y +; } +; } + +; CHECK-LABEL: @max_vf +; CHECK: load <4 x i32> +; CHECK: store <4 x i32> + +define void @max_vf(%struct.pair* noalias nocapture %p) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %0 = add nuw nsw i64 %i, 2 + %p_i.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 0 + %p_i_plus_2.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 0 + %1 = load i32, i32* %p_i.x, align 4 + store i32 %1, i32* %p_i_plus_2.x, align 4 + %p_i.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 1 + %p_i_plus_2.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 1 + %2 = load i32, i32* %p_i.y, align 4 + store i32 %2, i32* %p_i_plus_2.y, align 4 + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp eq i64 %i.next, 1000 + br i1 %cond, label %for.exit, label %for.body + +for.exit: + ret void +} |

