[LICM] Make store promotion work in the face of unordered atomics

Extend our store promotion code to deal with unordered atomic accesses. Ordered atomics continue to be unhandled. Most of the change is straight-forward, the only complicated bit is in the reasoning around mixing of atomic and non-atomic memory access. Rather than trying to reason about the complex semantics in these cases, I simply disallowed promotion when both atomic and non-atomic accesses are present. This is conservatively correct. It seems really tempting to just promote all access to atomics, but the original accesses might have been conditional. Since we can't lower an arbitrary atomic type, it might not be safe to promote all access to atomic. Consider a loop like the following: while(b) { load i128 ... if (can lower i128 atomic) store atomic i128 ... else store i128 } It could be there's no race on the location and thus the code is perfectly well defined even if we can't lower a i128 atomically. It's not clear we need to be this conservative - arguably the program above is brocken since it can't be lowered unless the branch is folded - but I didn't want to have to fix any fallout which might result. Differential Revision: https://reviews.llvm.org/D15592 llvm-svn: 295015
author: Philip Reames <listmail@philipreames.com> 2017-02-14 01:38:31 +0000
committer: Philip Reames <listmail@philipreames.com> 2017-02-14 01:38:31 +0000
commit: b2bca7e309848db08237d6d4a5955a5d5d8b337e (patch)
tree: 9eaafc7b9e2f02c954ba38d38ee9c1bdf317f783 /llvm/test
parent: e2fa5492b20e06c1dcc356f6242c45e7705c1c48 (diff)
download: bcm5719-llvm-b2bca7e309848db08237d6d4a5955a5d5d8b337e.tar.gz
bcm5719-llvm-b2bca7e309848db08237d6d4a5955a5d5d8b337e.zip
2 files changed, 171 insertions, 2 deletions
diff --git a/llvm/test/Transforms/LICM/atomics.ll b/llvm/test/Transforms/LICM/atomics.ll
index 919d1bdd114..15c461aeca2 100644
--- a/llvm/test/Transforms/LICM/atomics.ll
+++ b/llvm/test/Transforms/LICM/atomics.ll
@@ -60,8 +60,7 @@ end:
 ; CHECK-NEXT: br label %loop
 }
 
-; Don't try to "sink" unordered stores yet; it is legal, but the machinery
-; isn't there.
+; We can sink an unordered store
 define i32 @test4(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
 entry:
   br label %loop
@@ -75,6 +74,149 @@ loop:
 end:
   ret i32 %vala
 ; CHECK-LABEL: define i32 @test4(
+; CHECK-LABEL: loop:
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NOT: store
+; CHECK-LABEL: end:
+; CHECK-NEXT:   %[[LCSSAPHI:.*]] = phi i32 [ %vala
+; CHECK:   store atomic i32 %[[LCSSAPHI]], i32* %x unordered, align 4
+}
+
+; We currently don't handle ordered atomics.
+define i32 @test5(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
+entry:
+  br label %loop
+
+loop:
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store atomic i32 %vala, i32* %x release, align 4
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test5(
 ; CHECK: load atomic i32, i32* %y monotonic
 ; CHECK-NEXT: store atomic
 }
+
+; We currently don't touch volatiles
+define i32 @test6(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
+entry:
+  br label %loop
+
+loop:
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store volatile i32 %vala, i32* %x, align 4
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test6(
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NEXT: store volatile
+}
+
+; We currently don't touch volatiles
+define i32 @test6b(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
+entry:
+  br label %loop
+
+loop:
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store atomic volatile i32 %vala, i32* %x unordered, align 4
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test6b(
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NEXT: store atomic volatile
+}
+
+; Mixing unorder atomics and normal loads/stores is
+; current unimplemented
+define i32 @test7(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
+entry:
+  br label %loop
+
+loop:
+  store i32 5, i32* %x
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store atomic i32 %vala, i32* %x unordered, align 4
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test7(
+; CHECK: store i32 5, i32* %x
+; CHECK-NEXT: load atomic i32, i32* %y
+; CHECK-NEXT: store atomic i32
+}
+
+; Three provably noalias locations - we can sink normal and unordered, but
+;  not monotonic
+define i32 @test7b(i32* nocapture noalias %x, i32* nocapture %y, i32* noalias nocapture %z) nounwind uwtable ssp {
+entry:
+  br label %loop
+
+loop:
+  store i32 5, i32* %x
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store atomic i32 %vala, i32* %z unordered, align 4
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test7b(
+; CHECK: load atomic i32, i32* %y monotonic
+
+; CHECK-LABEL: end:
+; CHECK: store i32 5, i32* %x
+; CHECK: store atomic i32 %{{.+}}, i32* %z unordered, align 4
+}
+
+
+define i32 @test8(i32* nocapture noalias %x, i32* nocapture %y) {
+entry:
+  br label %loop
+
+loop:
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store atomic i32 %vala, i32* %x unordered, align 4
+  fence release
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test8(
+; CHECK-LABEL: loop:
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: fence
+}
+
+; Exact semantics of monotonic accesses are a bit vague in the C++ spec,
+; for the moment, be conservative and don't touch them.
+define i32 @test9(i32* nocapture noalias %x, i32* nocapture %y) {
+entry:
+  br label %loop
+
+loop:
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  store atomic i32 %vala, i32* %x monotonic, align 4
+  %exitcond = icmp ne i32 %vala, 0
+  br i1 %exitcond, label %end, label %loop
+
+end:
+  ret i32 %vala
+; CHECK-LABEL: define i32 @test9(
+; CHECK-LABEL: loop:
+; CHECK: load atomic i32, i32* %y monotonic
+; CHECK-NEXT:   store atomic i32 %vala, i32* %x monotonic, align 4
+}
diff --git a/llvm/test/Transforms/LICM/scalar-promote.ll b/llvm/test/Transforms/LICM/scalar-promote.ll
index c88701154b8..89888546494 100644
--- a/llvm/test/Transforms/LICM/scalar-promote.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote.ll
@@ -378,6 +378,33 @@ exit:
   ret i32 %ret
 }
 
+define void @test10(i32 %i) {
+Entry:
+  br label %Loop
+; CHECK-LABEL: @test10(
+; CHECK: Entry:
+; CHECK-NEXT:   load atomic i32, i32* @X unordered, align 4
+; CHECK-NEXT:   br label %Loop
+
+
+Loop:   ; preds = %Loop, %0
+  %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]    ; <i32> [#uses=1]
+  %x = load atomic i32, i32* @X unordered, align 4
+  %x2 = add i32 %x, 1
+  store atomic i32 %x2, i32* @X unordered, align 4
+  %Next = add i32 %j, 1
+  %cond = icmp eq i32 %Next, 0
+  br i1 %cond, label %Out, label %Loop
+
+Out:
+  ret void
+; CHECK: Out:
+; CHECK-NEXT:   %[[LCSSAPHI:.*]] = phi i32 [ %x2
+; CHECK-NEXT:   store atomic i32 %[[LCSSAPHI]], i32* @X unordered, align 4
+; CHECK-NEXT:   ret void
+
+}
+
 !0 = !{!4, !4, i64 0}
 !1 = !{!"omnipotent char", !2}
 !2 = !{!"Simple C/C++ TBAA"}
author	Philip Reames <listmail@philipreames.com>	2017-02-14 01:38:31 +0000
committer	Philip Reames <listmail@philipreames.com>	2017-02-14 01:38:31 +0000
commit	b2bca7e309848db08237d6d4a5955a5d5d8b337e (patch)
tree	9eaafc7b9e2f02c954ba38d38ee9c1bdf317f783 /llvm/test
parent	e2fa5492b20e06c1dcc356f6242c45e7705c1c48 (diff)
download	bcm5719-llvm-b2bca7e309848db08237d6d4a5955a5d5d8b337e.tar.gz bcm5719-llvm-b2bca7e309848db08237d6d4a5955a5d5d8b337e.zip