[LV] Avoid vectorizing unsafe dependencies in uniform address

Summary: Currently, when vectorizing stores to uniform addresses, the only instance we prevent vectorization is if there are multiple stores to the same uniform address causing an unsafe dependency. This patch teaches LAA to avoid vectorizing loops that have an unsafe cross-iteration dependency between a load and a store to the same uniform address. Fixes PR39653. Reviewers: Ayal, efriedma Subscribers: rkruppe, llvm-commits Differential Revision: https://reviews.llvm.org/D54538 llvm-svn: 347220
author: Anna Thomas <anna@azul.com> 2018-11-19 15:39:59 +0000
committer: Anna Thomas <anna@azul.com> 2018-11-19 15:39:59 +0000
commit: 5e9215f02bc54f89148da065e40948fbd038a2c4 (patch)
tree: 1eaa4dd24264f27c4662669ecae046ebf5613f65 /llvm/test
parent: 8a063df19435af25eadd30734be1b76275cbb56b (diff)
download: bcm5719-llvm-5e9215f02bc54f89148da065e40948fbd038a2c4.tar.gz
bcm5719-llvm-5e9215f02bc54f89148da065e40948fbd038a2c4.zip
5 files changed, 50 insertions, 8 deletions
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
index 0d0fe65694c..cb1b7edb3d1 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/memcheck-wrapping-pointers.ll
@@ -39,7 +39,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 ; CHECK-NEXT:      Group
 ; CHECK-NEXT:        (Low: %b High: ((4 * (1 umax %x)) + %b))
 ; CHECK-NEXT:          Member: {%b,+,4}<%for.body>
-; CHECK:         Multiple stores to invariant address were not found in loop.
+; CHECK:         Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:    SCEV assumptions:
 ; CHECK-NEXT:    {1,+,1}<%for.body> Added Flags: <nusw>
 ; CHECK-NEXT:    {0,+,1}<%for.body> Added Flags: <nusw>
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll
index f24211d1e0d..611e957168f 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check1.ll
@@ -14,14 +14,14 @@
 ; The LAA with the new PM is a loop pass so we go from inner to outer loops.
 
 ; OLDPM: for.cond1.preheader:
-; OLDPM:   Multiple stores to invariant address were not found in loop.
+; OLDPM:   Non vectorizable stores to invariant address were not found in loop.
 ; OLDPM: for.body3:
-; OLDPM:   Multiple stores to invariant address were found in loop.
+; OLDPM:   Non vectorizable stores to invariant address were found in loop.
 
 ; NEWPM: for.body3:
-; NEWPM:   Multiple stores to invariant address were found in loop.
+; NEWPM:   Non vectorizable stores to invariant address were found in loop.
 ; NEWPM: for.cond1.preheader:
-; NEWPM:   Multiple stores to invariant address were not found in loop.
+; NEWPM:   Non vectorizable stores to invariant address were not found in loop.
 
 define i32 @foo(i32* nocapture %var1, i32* nocapture readonly %var2, i32 %itr) #0 {
 entry:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll
index 07bcdcc5c66..d21cc6926c3 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check2.ll
@@ -10,8 +10,8 @@
 ;    }
 ;  }
 
-; CHECK: Multiple stores to invariant address were not found in loop.
-; CHECK-NOT: Multiple stores to invariant address were found in loop.
+; CHECK: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NOT: Non vectorizable stores to invariant address were found in loop.
 
 
 define i32 @foo(i32* nocapture readonly %var1, i32* nocapture %var2, i32 %itr) #0 {
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll
index 8d7452471f5..b25d79b3d03 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/store-to-invariant-check3.ll
@@ -10,7 +10,7 @@
 ;    }
 ;  }
 
-; CHECK: Multiple stores to invariant address were not found in loop.
+; CHECK: Non vectorizable stores to invariant address were not found in loop.
 
 define void @foo(i32* nocapture %var1, i32* nocapture %var2, i32 %itr) #0 {
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
index 4155352d529..cf1257b8013 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
@@ -551,3 +551,45 @@ for.inc8:                                         ; preds = %for.body3, %for.con
 for.end10:                                        ; preds = %for.inc8, %entry
   ret i32 undef
 }
+
+; cannot vectorize loop with unsafe dependency between uniform load (%tmp10) and store
+; (%tmp12) to the same address
+; PR39653
+; Note: %tmp10 could be replaced by phi(%arg4, %tmp12), a potentially vectorizable
+; 1st-order-recurrence
+define void @unsafe_dep_uniform_load_store(i32 %arg, i32 %arg1, i64 %arg2, i16* %arg3, i32 %arg4, i64 %arg5) {
+; CHECK-LABEL: unsafe_dep_uniform_load_store
+; CHECK-NOT: <4 x i32>
+bb:
+  %tmp = alloca i32
+  store i32 %arg4, i32* %tmp
+  %tmp6 = getelementptr inbounds i16, i16* %arg3, i64 %arg5
+  br label %bb7
+
+bb7:
+  %tmp8 = phi i64 [ 0, %bb ], [ %tmp24, %bb7 ]
+  %tmp9 = phi i32 [ %arg1, %bb ], [ %tmp23, %bb7 ]
+  %tmp10 = load i32, i32* %tmp
+  %tmp11 = mul nsw i32 %tmp9, %tmp10
+  %tmp12 = srem i32 %tmp11, 65536
+  %tmp13 = add nsw i32 %tmp12, %tmp9
+  %tmp14 = trunc i32 %tmp13 to i16
+  %tmp15 = trunc i64 %tmp8 to i32
+  %tmp16 = add i32 %arg, %tmp15
+  %tmp17 = zext i32 %tmp16 to i64
+  %tmp18 = getelementptr inbounds i16, i16* %tmp6, i64 %tmp17
+  store i16 %tmp14, i16* %tmp18, align 2
+  %tmp19 = add i32 %tmp13, %tmp9
+  %tmp20 = trunc i32 %tmp19 to i16
+  %tmp21 = and i16 %tmp20, 255
+  %tmp22 = getelementptr inbounds i16, i16* %arg3, i64 %tmp17
+  store i16 %tmp21, i16* %tmp22, align 2
+  %tmp23 = add nsw i32 %tmp9, 1
+  %tmp24 = add nuw nsw i64 %tmp8, 1
+  %tmp25 = icmp eq i64 %tmp24, %arg2
+  store i32 %tmp12, i32* %tmp
+  br i1 %tmp25, label %bb26, label %bb7
+
+bb26:
+  ret void
+}
author	Anna Thomas <anna@azul.com>	2018-11-19 15:39:59 +0000
committer	Anna Thomas <anna@azul.com>	2018-11-19 15:39:59 +0000
commit	5e9215f02bc54f89148da065e40948fbd038a2c4 (patch)
tree	1eaa4dd24264f27c4662669ecae046ebf5613f65 /llvm/test
parent	8a063df19435af25eadd30734be1b76275cbb56b (diff)
download	bcm5719-llvm-5e9215f02bc54f89148da065e40948fbd038a2c4.tar.gz bcm5719-llvm-5e9215f02bc54f89148da065e40948fbd038a2c4.zip