From e43198dc4b8bcce0518575d16bff52a6d38cea1f Mon Sep 17 00:00:00 2001 From: Matthew Simpson Date: Mon, 16 May 2016 15:08:20 +0000 Subject: [LV] Ensure safe VF for loops with interleaved accesses The selection of the vectorization factor currently doesn't consider interleaved accesses. The vectorization factor is based on the maximum safe dependence distance computed by LAA. However, for loops with interleaved groups, we should instead base the vectorization factor on the maximum safe dependence distance divided by the maximum interleave factor of all the interleaved groups. Interleaved accesses not in a group will be scalarized. Differential Revision: http://reviews.llvm.org/D20241 llvm-svn: 269659 --- .../AArch64/max-vf-for-interleaved.ll | 56 ++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll (limited to 'llvm/test/Transforms') diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll b/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll new file mode 100644 index 00000000000..ad1dd0edc3b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -force-vector-interleave=1 -enable-conflict-detection=false -loop-vectorize -dce -instcombine -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +%struct.pair = type { i32, i32 } + +; Check vectorization of interleaved access groups with positive dependence +; distances. In this test, the maximum safe dependence distance for +; vectorization is 16 bytes. Normally, this would lead to a maximum VF of 4. +; However, for interleaved groups, the effective VF is VF * IF, where IF is the +; interleave factor. Here, the maximum safe dependence distance is recomputed +; as 16 / IF bytes, resulting in VF=2. Since IF=2, we should generate <4 x i32> +; loads and stores instead of <8 x i32> accesses. +; +; Note: LAA's conflict detection optimization has to be disabled for this test +; to be vectorized. + +; struct pair { +; int x; +; int y; +; }; +; +; void max_vf(struct pair *restrict p) { +; for (int i = 0; i < 1000; i++) { +; p[i + 2].x = p[i].x +; p[i + 2].y = p[i].y +; } +; } + +; CHECK-LABEL: @max_vf +; CHECK: load <4 x i32> +; CHECK: store <4 x i32> + +define void @max_vf(%struct.pair* noalias nocapture %p) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] + %0 = add nuw nsw i64 %i, 2 + %p_i.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 0 + %p_i_plus_2.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 0 + %1 = load i32, i32* %p_i.x, align 4 + store i32 %1, i32* %p_i_plus_2.x, align 4 + %p_i.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 1 + %p_i_plus_2.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 1 + %2 = load i32, i32* %p_i.y, align 4 + store i32 %2, i32* %p_i_plus_2.y, align 4 + %i.next = add nuw nsw i64 %i, 1 + %cond = icmp eq i64 %i.next, 1000 + br i1 %cond, label %for.exit, label %for.body + +for.exit: + ret void +} -- cgit v1.2.3