[PPC] Lower load acquire/seq_cst trailing fence to cmp + bne + isync.

Summary: This fixes pr32392. The lowering pipeline is: llvm.ppc.cfence in IR -> PPC::CFENCE8 in isel -> Actual instructions in expandPostRAPseudo. The reason why expandPostRAPseudo is chosen is because previous passes are likely eliminating instructions like cmpw 3, 3 (early CSE) and bne- 7, .+4 (some branch pass(s)). Differential Revision: https://reviews.llvm.org/D32763 llvm-svn: 303205
author: Tim Shen <timshen91@gmail.com> 2017-05-16 20:18:06 +0000
committer: Tim Shen <timshen91@gmail.com> 2017-05-16 20:18:06 +0000
commit: 3bef27cc6f42c01f56cf0b6876b7cf0d3c10ba6d (patch)
tree: 65638641b9e50fbb19fe06424c96fd1dbcfc72ec /llvm/test/CodeGen/PowerPC/atomics-regression.ll
parent: dadc0f11ad99841f6b205bf798497ee32adf7ec6 (diff)
download: bcm5719-llvm-3bef27cc6f42c01f56cf0b6876b7cf0d3c10ba6d.tar.gz
bcm5719-llvm-3bef27cc6f42c01f56cf0b6876b7cf0d3c10ba6d.zip
1 files changed, 56 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
index 9af82b62553..054d3a4146b 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
@@ -23,7 +23,9 @@ define i8 @test2(i8* %ptr) {
 ; PPC64LE-LABEL: test2:
 ; PPC64LE:       # BB#0:
 ; PPC64LE-NEXT:    lbz 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i8, i8* %ptr acquire, align 1
   ret i8 %val
@@ -35,7 +37,9 @@ define i8 @test3(i8* %ptr) {
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    ori 2, 2, 0
 ; PPC64LE-NEXT:    lbz 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i8, i8* %ptr seq_cst, align 1
   ret i8 %val
@@ -63,7 +67,9 @@ define i16 @test6(i16* %ptr) {
 ; PPC64LE-LABEL: test6:
 ; PPC64LE:       # BB#0:
 ; PPC64LE-NEXT:    lhz 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i16, i16* %ptr acquire, align 2
   ret i16 %val
@@ -75,7 +81,9 @@ define i16 @test7(i16* %ptr) {
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    ori 2, 2, 0
 ; PPC64LE-NEXT:    lhz 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i16, i16* %ptr seq_cst, align 2
   ret i16 %val
@@ -103,7 +111,9 @@ define i32 @test10(i32* %ptr) {
 ; PPC64LE-LABEL: test10:
 ; PPC64LE:       # BB#0:
 ; PPC64LE-NEXT:    lwz 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i32, i32* %ptr acquire, align 4
   ret i32 %val
@@ -115,7 +125,9 @@ define i32 @test11(i32* %ptr) {
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    ori 2, 2, 0
 ; PPC64LE-NEXT:    lwz 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i32, i32* %ptr seq_cst, align 4
   ret i32 %val
@@ -143,7 +155,9 @@ define i64 @test14(i64* %ptr) {
 ; PPC64LE-LABEL: test14:
 ; PPC64LE:       # BB#0:
 ; PPC64LE-NEXT:    ld 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i64, i64* %ptr acquire, align 8
   ret i64 %val
@@ -155,7 +169,9 @@ define i64 @test15(i64* %ptr) {
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    ori 2, 2, 0
 ; PPC64LE-NEXT:    ld 3, 0(3)
-; PPC64LE-NEXT:    lwsync
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
 ; PPC64LE-NEXT:    blr
   %val = load atomic i64, i64* %ptr seq_cst, align 8
   ret i64 %val
@@ -9544,3 +9560,35 @@ define i64 @test559(i64* %ptr, i64 %val) {
   %ret = atomicrmw umin i64* %ptr, i64 %val singlethread seq_cst
   ret i64 %ret
 }
+
+; The second load should never be scheduled before isync.
+define i32 @test_ordering0(i32* %ptr1, i32* %ptr2) {
+; PPC64LE-LABEL: test_ordering0:
+; PPC64LE:       # BB#0:
+; PPC64LE-NEXT:    lwz 4, 0(3)
+; PPC64LE-NEXT:    cmpw 7, 4, 4
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
+; PPC64LE-NEXT:    lwz 3, 0(3)
+; PPC64LE-NEXT:    add 3, 4, 3
+; PPC64LE-NEXT:    blr
+  %val1 = load atomic i32, i32* %ptr1 acquire, align 4
+  %val2 = load i32, i32* %ptr1
+  %add = add i32 %val1, %val2
+  ret i32 %add
+}
+
+; The second store should never be scheduled before isync.
+define i32 @test_ordering1(i32* %ptr1, i32 %val1, i32* %ptr2) {
+; PPC64LE-LABEL: test_ordering1:
+; PPC64LE:       # BB#0:
+; PPC64LE-NEXT:    lwz 3, 0(3)
+; PPC64LE-NEXT:    cmpw 7, 3, 3
+; PPC64LE-NEXT:    bne- 7, .+4
+; PPC64LE-NEXT:    isync
+; PPC64LE-NEXT:    stw 4, 0(5)
+; PPC64LE-NEXT:    blr
+  %val2 = load atomic i32, i32* %ptr1 acquire, align 4
+  store i32 %val1, i32* %ptr2
+  ret i32 %val2
+}
author	Tim Shen <timshen91@gmail.com>	2017-05-16 20:18:06 +0000
committer	Tim Shen <timshen91@gmail.com>	2017-05-16 20:18:06 +0000
commit	3bef27cc6f42c01f56cf0b6876b7cf0d3c10ba6d (patch)
tree	65638641b9e50fbb19fe06424c96fd1dbcfc72ec /llvm/test/CodeGen/PowerPC/atomics-regression.ll
parent	dadc0f11ad99841f6b205bf798497ee32adf7ec6 (diff)
download	bcm5719-llvm-3bef27cc6f42c01f56cf0b6876b7cf0d3c10ba6d.tar.gz bcm5719-llvm-3bef27cc6f42c01f56cf0b6876b7cf0d3c10ba6d.zip