diff options
| author | Anna Thomas <anna@azul.com> | 2018-11-19 15:39:59 +0000 |
|---|---|---|
| committer | Anna Thomas <anna@azul.com> | 2018-11-19 15:39:59 +0000 |
| commit | 5e9215f02bc54f89148da065e40948fbd038a2c4 (patch) | |
| tree | 1eaa4dd24264f27c4662669ecae046ebf5613f65 /llvm/test/Transforms | |
| parent | 8a063df19435af25eadd30734be1b76275cbb56b (diff) | |
| download | bcm5719-llvm-5e9215f02bc54f89148da065e40948fbd038a2c4.tar.gz bcm5719-llvm-5e9215f02bc54f89148da065e40948fbd038a2c4.zip | |
[LV] Avoid vectorizing unsafe dependencies in uniform address
Summary:
Currently, when vectorizing stores to uniform addresses, the only
instance we prevent vectorization is if there are multiple stores to the
same uniform address causing an unsafe dependency.
This patch teaches LAA to avoid vectorizing loops that have an unsafe
cross-iteration dependency between a load and a store to the same uniform address.
Fixes PR39653.
Reviewers: Ayal, efriedma
Subscribers: rkruppe, llvm-commits
Differential Revision: https://reviews.llvm.org/D54538
llvm-svn: 347220
Diffstat (limited to 'llvm/test/Transforms')
| -rw-r--r-- | llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll index 4155352d529..cf1257b8013 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -551,3 +551,45 @@ for.inc8: ; preds = %for.body3, %for.con for.end10: ; preds = %for.inc8, %entry ret i32 undef } + +; cannot vectorize loop with unsafe dependency between uniform load (%tmp10) and store +; (%tmp12) to the same address +; PR39653 +; Note: %tmp10 could be replaced by phi(%arg4, %tmp12), a potentially vectorizable +; 1st-order-recurrence +define void @unsafe_dep_uniform_load_store(i32 %arg, i32 %arg1, i64 %arg2, i16* %arg3, i32 %arg4, i64 %arg5) { +; CHECK-LABEL: unsafe_dep_uniform_load_store +; CHECK-NOT: <4 x i32> +bb: + %tmp = alloca i32 + store i32 %arg4, i32* %tmp + %tmp6 = getelementptr inbounds i16, i16* %arg3, i64 %arg5 + br label %bb7 + +bb7: + %tmp8 = phi i64 [ 0, %bb ], [ %tmp24, %bb7 ] + %tmp9 = phi i32 [ %arg1, %bb ], [ %tmp23, %bb7 ] + %tmp10 = load i32, i32* %tmp + %tmp11 = mul nsw i32 %tmp9, %tmp10 + %tmp12 = srem i32 %tmp11, 65536 + %tmp13 = add nsw i32 %tmp12, %tmp9 + %tmp14 = trunc i32 %tmp13 to i16 + %tmp15 = trunc i64 %tmp8 to i32 + %tmp16 = add i32 %arg, %tmp15 + %tmp17 = zext i32 %tmp16 to i64 + %tmp18 = getelementptr inbounds i16, i16* %tmp6, i64 %tmp17 + store i16 %tmp14, i16* %tmp18, align 2 + %tmp19 = add i32 %tmp13, %tmp9 + %tmp20 = trunc i32 %tmp19 to i16 + %tmp21 = and i16 %tmp20, 255 + %tmp22 = getelementptr inbounds i16, i16* %arg3, i64 %tmp17 + store i16 %tmp21, i16* %tmp22, align 2 + %tmp23 = add nsw i32 %tmp9, 1 + %tmp24 = add nuw nsw i64 %tmp8, 1 + %tmp25 = icmp eq i64 %tmp24, %arg2 + store i32 %tmp12, i32* %tmp + br i1 %tmp25, label %bb26, label %bb7 + +bb26: + ret void +} |

