diff options
author | Philip Reames <listmail@philipreames.com> | 2019-03-14 17:20:59 +0000 |
---|---|---|
committer | Philip Reames <listmail@philipreames.com> | 2019-03-14 17:20:59 +0000 |
commit | 70d156991ca425b2c472935e144e98ce205c666c (patch) | |
tree | 9f22561d367508f82225bced0c46ae8a9a0f880e /llvm | |
parent | 6f8dddf169336449aac339e6e107d604e8adf6a4 (diff) | |
download | bcm5719-llvm-70d156991ca425b2c472935e144e98ce205c666c.tar.gz bcm5719-llvm-70d156991ca425b2c472935e144e98ce205c666c.zip |
Allow code motion (and thus folding) for atomic (but unordered) memory operands
Building on the work done in D57601, now that we can distinguish between atomic and volatile memory accesses, go ahead and allow code motion of unordered atomics. As seen in the diffs, this allows much better folding of memory operations into using instructions. (Mostly done by the PeepholeOpt pass.)
Note: I have not reviewed all callers of hasOrderedMemoryRef since one of them - isSafeToMove - is very widely used. I'm relying on the documented semantics of each method to judge correctness.
Differential Revision: https://reviews.llvm.org/D59345
llvm-svn: 356170
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/CodeGen/MachineInstr.cpp | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/atomic-non-integer.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/atomic-unordered.ll | 89 |
3 files changed, 31 insertions, 71 deletions
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 95f5eb91ee1..17bd0f38964 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1291,10 +1291,8 @@ bool MachineInstr::hasOrderedMemoryRef() const { return true; // Check if any of our memory operands are ordered. - // TODO: This should probably be be isUnordered (see D57601), but the callers - // need audited and test cases written to be sure. return llvm::any_of(memoperands(), [](const MachineMemOperand *MMO) { - return MMO->isVolatile() || MMO->isAtomic(); + return !MMO->isUnordered(); }); } diff --git a/llvm/test/CodeGen/X86/atomic-non-integer.ll b/llvm/test/CodeGen/X86/atomic-non-integer.ll index 36a55ef2923..bdeeab37fd3 100644 --- a/llvm/test/CodeGen/X86/atomic-non-integer.ll +++ b/llvm/test/CodeGen/X86/atomic-non-integer.ll @@ -62,8 +62,7 @@ define half @load_half(half* %fptr) { ; CHECK: # %bb.0: ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: movzwl (%rdi), %eax -; CHECK-NEXT: movzwl %ax, %edi +; CHECK-NEXT: movzwl (%rdi), %edi ; CHECK-NEXT: callq __gnu_h2f_ieee ; CHECK-NEXT: popq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -75,8 +74,7 @@ define half @load_half(half* %fptr) { define float @load_float(float* %fptr) { ; CHECK-LABEL: load_float: ; CHECK: # %bb.0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: movd %eax, %xmm0 +; CHECK-NEXT: movd (%rdi), %xmm0 ; CHECK-NEXT: retq %v = load atomic float, float* %fptr unordered, align 4 ret float %v @@ -85,8 +83,7 @@ define float @load_float(float* %fptr) { define double @load_double(double* %fptr) { ; CHECK-LABEL: load_double: ; CHECK: # %bb.0: -; CHECK-NEXT: movq (%rdi), %rax -; CHECK-NEXT: movq %rax, %xmm0 +; CHECK-NEXT: movq (%rdi), %xmm0 ; CHECK-NEXT: retq %v = load atomic double, double* %fptr unordered, align 8 ret double %v diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll index 8993f292fcd..e56ebe85fe3 100644 --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -437,7 +437,6 @@ define i64 @load_fold_add1(i64* %p) { ret i64 %ret } -; Legal, as expected define i64 @load_fold_add2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_add2: ; CHECK-O0: # %bb.0: @@ -447,15 +446,14 @@ define i64 @load_fold_add2(i64* %p, i64 %v2) { ; ; CHECK-O3-LABEL: load_fold_add2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: addq %rsi, %rax +; CHECK-O3-NEXT: movq %rsi, %rax +; CHECK-O3-NEXT: addq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = add i64 %v, %v2 ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_add3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_add3: ; CHECK-O0: # %bb.0: @@ -466,9 +464,8 @@ define i64 @load_fold_add3(i64* %p1, i64* %p2) { ; ; CHECK-O3-LABEL: load_fold_add3: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rcx ; CHECK-O3-NEXT: movq (%rsi), %rax -; CHECK-O3-NEXT: addq %rcx, %rax +; CHECK-O3-NEXT: addq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -495,7 +492,6 @@ define i64 @load_fold_sub1(i64* %p) { ret i64 %ret } -; Legal, as expected define i64 @load_fold_sub2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_sub2: ; CHECK-O0: # %bb.0: @@ -514,7 +510,6 @@ define i64 @load_fold_sub2(i64* %p, i64 %v2) { ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_sub3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_sub3: ; CHECK-O0: # %bb.0: @@ -526,8 +521,7 @@ define i64 @load_fold_sub3(i64* %p1, i64* %p2) { ; CHECK-O3-LABEL: load_fold_sub3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq (%rsi), %rcx -; CHECK-O3-NEXT: subq %rcx, %rax +; CHECK-O3-NEXT: subq (%rsi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -553,7 +547,6 @@ define i64 @load_fold_mul1(i64* %p) { ret i64 %ret } -; Legal, O0 is better than O3 codegen (TODO) define i64 @load_fold_mul2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_mul2: ; CHECK-O0: # %bb.0: @@ -563,15 +556,14 @@ define i64 @load_fold_mul2(i64* %p, i64 %v2) { ; ; CHECK-O3-LABEL: load_fold_mul2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: imulq %rsi, %rax +; CHECK-O3-NEXT: movq %rsi, %rax +; CHECK-O3-NEXT: imulq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = mul i64 %v, %v2 ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_mul3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_mul3: ; CHECK-O0: # %bb.0: @@ -582,9 +574,8 @@ define i64 @load_fold_mul3(i64* %p1, i64* %p2) { ; ; CHECK-O3-LABEL: load_fold_mul3: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rcx ; CHECK-O3-NEXT: movq (%rsi), %rax -; CHECK-O3-NEXT: imulq %rcx, %rax +; CHECK-O3-NEXT: imulq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -639,7 +630,6 @@ define i64 @load_fold_sdiv2(i64* %p, i64 %v2) { ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_sdiv3: ; CHECK-O0: # %bb.0: @@ -651,9 +641,8 @@ define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) { ; CHECK-O3-LABEL: load_fold_sdiv3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq (%rsi), %rcx ; CHECK-O3-NEXT: cqto -; CHECK-O3-NEXT: idivq %rcx +; CHECK-O3-NEXT: idivq (%rsi) ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -685,7 +674,6 @@ define i64 @load_fold_udiv1(i64* %p) { ret i64 %ret } -; Legal, as expected define i64 @load_fold_udiv2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_udiv2: ; CHECK-O0: # %bb.0: @@ -706,7 +694,6 @@ define i64 @load_fold_udiv2(i64* %p, i64 %v2) { ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_udiv3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_udiv3: ; CHECK-O0: # %bb.0: @@ -719,9 +706,8 @@ define i64 @load_fold_udiv3(i64* %p1, i64* %p2) { ; CHECK-O3-LABEL: load_fold_udiv3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq (%rsi), %rcx ; CHECK-O3-NEXT: xorl %edx, %edx -; CHECK-O3-NEXT: divq %rcx +; CHECK-O3-NEXT: divq (%rsi) ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -783,7 +769,6 @@ define i64 @load_fold_srem2(i64* %p, i64 %v2) { ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_srem3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_srem3: ; CHECK-O0: # %bb.0: @@ -796,9 +781,8 @@ define i64 @load_fold_srem3(i64* %p1, i64* %p2) { ; CHECK-O3-LABEL: load_fold_srem3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq (%rsi), %rcx ; CHECK-O3-NEXT: cqto -; CHECK-O3-NEXT: idivq %rcx +; CHECK-O3-NEXT: idivq (%rsi) ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 @@ -859,7 +843,6 @@ define i64 @load_fold_urem2(i64* %p, i64 %v2) { ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_urem3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_urem3: ; CHECK-O0: # %bb.0: @@ -873,9 +856,8 @@ define i64 @load_fold_urem3(i64* %p1, i64* %p2) { ; CHECK-O3-LABEL: load_fold_urem3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq (%rsi), %rcx ; CHECK-O3-NEXT: xorl %edx, %edx -; CHECK-O3-NEXT: divq %rcx +; CHECK-O3-NEXT: divq (%rsi) ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 @@ -1101,7 +1083,6 @@ define i64 @load_fold_and1(i64* %p) { ret i64 %ret } -; Legal, as expected define i64 @load_fold_and2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_and2: ; CHECK-O0: # %bb.0: @@ -1111,15 +1092,14 @@ define i64 @load_fold_and2(i64* %p, i64 %v2) { ; ; CHECK-O3-LABEL: load_fold_and2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: andq %rsi, %rax +; CHECK-O3-NEXT: movq %rsi, %rax +; CHECK-O3-NEXT: andq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = and i64 %v, %v2 ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_and3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_and3: ; CHECK-O0: # %bb.0: @@ -1130,9 +1110,8 @@ define i64 @load_fold_and3(i64* %p1, i64* %p2) { ; ; CHECK-O3-LABEL: load_fold_and3: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rcx ; CHECK-O3-NEXT: movq (%rsi), %rax -; CHECK-O3-NEXT: andq %rcx, %rax +; CHECK-O3-NEXT: andq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -1159,7 +1138,6 @@ define i64 @load_fold_or1(i64* %p) { ret i64 %ret } -; Legal, as expected define i64 @load_fold_or2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_or2: ; CHECK-O0: # %bb.0: @@ -1169,15 +1147,14 @@ define i64 @load_fold_or2(i64* %p, i64 %v2) { ; ; CHECK-O3-LABEL: load_fold_or2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: orq %rsi, %rax +; CHECK-O3-NEXT: movq %rsi, %rax +; CHECK-O3-NEXT: orq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, %v2 ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_or3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_or3: ; CHECK-O0: # %bb.0: @@ -1188,9 +1165,8 @@ define i64 @load_fold_or3(i64* %p1, i64* %p2) { ; ; CHECK-O3-LABEL: load_fold_or3: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rcx ; CHECK-O3-NEXT: movq (%rsi), %rax -; CHECK-O3-NEXT: orq %rcx, %rax +; CHECK-O3-NEXT: orq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -1217,7 +1193,6 @@ define i64 @load_fold_xor1(i64* %p) { ret i64 %ret } -; Legal, as expected define i64 @load_fold_xor2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_xor2: ; CHECK-O0: # %bb.0: @@ -1227,15 +1202,14 @@ define i64 @load_fold_xor2(i64* %p, i64 %v2) { ; ; CHECK-O3-LABEL: load_fold_xor2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: xorq %rsi, %rax +; CHECK-O3-NEXT: movq %rsi, %rax +; CHECK-O3-NEXT: xorq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = xor i64 %v, %v2 ret i64 %ret } -; Legal to fold (TODO) define i64 @load_fold_xor3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_xor3: ; CHECK-O0: # %bb.0: @@ -1246,9 +1220,8 @@ define i64 @load_fold_xor3(i64* %p1, i64* %p2) { ; ; CHECK-O3-LABEL: load_fold_xor3: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rcx ; CHECK-O3-NEXT: movq (%rsi), %rax -; CHECK-O3-NEXT: xorq %rcx, %rax +; CHECK-O3-NEXT: xorq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 @@ -1256,7 +1229,6 @@ define i64 @load_fold_xor3(i64* %p1, i64* %p2) { ret i64 %ret } -; Legal to fold (TODO) define i1 @load_fold_icmp1(i64* %p) { ; CHECK-O0-LABEL: load_fold_icmp1: ; CHECK-O0: # %bb.0: @@ -1268,8 +1240,7 @@ define i1 @load_fold_icmp1(i64* %p) { ; ; CHECK-O3-LABEL: load_fold_icmp1: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: cmpq $15, %rax +; CHECK-O3-NEXT: cmpq $15, (%rdi) ; CHECK-O3-NEXT: sete %al ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 @@ -1277,7 +1248,6 @@ define i1 @load_fold_icmp1(i64* %p) { ret i1 %ret } -; Legal to fold (TODO) define i1 @load_fold_icmp2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_icmp2: ; CHECK-O0: # %bb.0: @@ -1289,8 +1259,7 @@ define i1 @load_fold_icmp2(i64* %p, i64 %v2) { ; ; CHECK-O3-LABEL: load_fold_icmp2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: cmpq %rsi, %rax +; CHECK-O3-NEXT: cmpq %rsi, (%rdi) ; CHECK-O3-NEXT: sete %al ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 @@ -1298,7 +1267,6 @@ define i1 @load_fold_icmp2(i64* %p, i64 %v2) { ret i1 %ret } -; Legal to fold (TODO) define i1 @load_fold_icmp3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_icmp3: ; CHECK-O0: # %bb.0: @@ -1311,9 +1279,8 @@ define i1 @load_fold_icmp3(i64* %p1, i64* %p2) { ; ; CHECK-O3-LABEL: load_fold_icmp3: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: movq (%rsi), %rcx -; CHECK-O3-NEXT: cmpq %rcx, %rax +; CHECK-O3-NEXT: movq (%rsi), %rax +; CHECK-O3-NEXT: cmpq %rax, (%rdi) ; CHECK-O3-NEXT: sete %al ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 @@ -1441,9 +1408,8 @@ define void @rmw_fold_mul2(i64* %p, i64 %v) { ; ; CHECK-O3-LABEL: rmw_fold_mul2: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: imulq %rsi, %rax -; CHECK-O3-NEXT: movq %rax, (%rdi) +; CHECK-O3-NEXT: imulq (%rdi), %rsi +; CHECK-O3-NEXT: movq %rsi, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = mul i64 %prev, %v @@ -2070,9 +2036,8 @@ define i64 @load_forwarding(i64* %p) { ; ; CHECK-O3-LABEL: load_forwarding: ; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq (%rdi), %rcx ; CHECK-O3-NEXT: movq (%rdi), %rax -; CHECK-O3-NEXT: orq %rcx, %rax +; CHECK-O3-NEXT: orq (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %v2 = load atomic i64, i64* %p unordered, align 8 |