diff options
-rw-r--r-- | llvm/test/CodeGen/X86/atomic_mi.ll | 242 |
1 files changed, 212 insertions, 30 deletions
diff --git a/llvm/test/CodeGen/X86/atomic_mi.ll b/llvm/test/CodeGen/X86/atomic_mi.ll index bf430eaea28..4c0439e8231 100644 --- a/llvm/test/CodeGen/X86/atomic_mi.ll +++ b/llvm/test/CodeGen/X86/atomic_mi.ll @@ -457,6 +457,188 @@ define void @add_32r_seq_cst(i32* %p, i32 %v) { ret void } +; ----- SUB ----- + +define void @sub_8r(i8* %p, i8 %v) { +; X64-LABEL: sub_8r: +; X64: # %bb.0: +; X64-NEXT: movb (%rdi), %al +; X64-NEXT: subb %sil, %al +; X64-NEXT: movb %al, (%rdi) +; X64-NEXT: retq +; +; X32-LABEL: sub_8r: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb (%eax), %cl +; X32-NEXT: subb {{[0-9]+}}(%esp), %cl +; X32-NEXT: movb %cl, (%eax) +; X32-NEXT: retl + %1 = load atomic i8, i8* %p seq_cst, align 1 + %2 = sub i8 %1, %v + store atomic i8 %2, i8* %p release, align 1 + ret void +} + +define void @sub_16r(i16* %p, i16 %v) { +; Currently the transformation is not done on 16 bit accesses, as the backend +; treat 16 bit arithmetic as expensive on X86/X86_64. +; X64-LABEL: sub_16r: +; X64: # %bb.0: +; X64-NEXT: movzwl (%rdi), %eax +; X64-NEXT: subw %si, %ax +; X64-NEXT: movw %ax, (%rdi) +; X64-NEXT: retq +; +; X32-LABEL: sub_16r: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzwl (%eax), %ecx +; X32-NEXT: subw {{[0-9]+}}(%esp), %cx +; X32-NEXT: movw %cx, (%eax) +; X32-NEXT: retl + %1 = load atomic i16, i16* %p acquire, align 2 + %2 = sub i16 %1, %v + store atomic i16 %2, i16* %p release, align 2 + ret void +} + +define void @sub_32r(i32* %p, i32 %v) { +; X64-LABEL: sub_32r: +; X64: # %bb.0: +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: movl %eax, (%rdi) +; X64-NEXT: retq +; +; X32-LABEL: sub_32r: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl (%eax), %ecx +; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %ecx, (%eax) +; X32-NEXT: retl + %1 = load atomic i32, i32* %p acquire, align 4 + %2 = sub i32 %1, %v + store atomic i32 %2, i32* %p monotonic, align 4 + ret void +} + +; The following is a corner case where the load is subed to itself. The pattern +; matching should not fold this. We only test with 32-bit sub, but the same +; applies to other sizes and operations. +define void @sub_32r_self(i32* %p) { +; X64-LABEL: sub_32r_self: +; X64: # %bb.0: +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl $0, (%rdi) +; X64-NEXT: retq +; +; X32-LABEL: sub_32r_self: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl (%eax), %ecx +; X32-NEXT: movl $0, (%eax) +; X32-NEXT: retl + %1 = load atomic i32, i32* %p acquire, align 4 + %2 = sub i32 %1, %1 + store atomic i32 %2, i32* %p monotonic, align 4 + ret void +} + +; The following is a corner case where the load's result is returned. The +; optimizer isn't allowed to duplicate the load because it's atomic. +define i32 @sub_32r_ret_load(i32* %p, i32 %v) { +; X64-LABEL: sub_32r_ret_load: +; X64: # %bb.0: +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: subl %esi, %ecx +; X64-NEXT: movl %ecx, (%rdi) +; X64-NEXT: retq +; +; X32-LABEL: sub_32r_ret_load: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl (%ecx), %eax +; X32-NEXT: movl %eax, %edx +; X32-NEXT: subl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movl %edx, (%ecx) +; X32-NEXT: retl +; More code here, we just don't want it to load from P. + %1 = load atomic i32, i32* %p acquire, align 4 + %2 = sub i32 %1, %v + store atomic i32 %2, i32* %p monotonic, align 4 + ret i32 %1 +} + +define void @sub_64r(i64* %p, i64 %v) { +; X64-LABEL: sub_64r: +; X64: # %bb.0: +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: subq %rsi, %rax +; X64-NEXT: movq %rax, (%rdi) +; X64-NEXT: retq +; +; X32-LABEL: sub_64r: +; X32: # %bb.0: +; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %esi, -12 +; X32-NEXT: .cfi_offset %ebx, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: xorl %ebx, %ebx +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: subl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl (%esi), %eax +; X32-NEXT: movl 4(%esi), %edx +; X32-NEXT: .p2align 4, 0x90 +; X32-NEXT: .LBB23_1: # %atomicrmw.start +; X32-NEXT: # =>This Inner Loop Header: Depth=1 +; X32-NEXT: lock cmpxchg8b (%esi) +; X32-NEXT: jne .LBB23_1 +; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: popl %esi +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: popl %ebx +; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: retl +; We do not check X86-32 as it cannot do 'subq'. + %1 = load atomic i64, i64* %p acquire, align 8 + %2 = sub i64 %1, %v + store atomic i64 %2, i64* %p release, align 8 + ret void +} + +define void @sub_32r_seq_cst(i32* %p, i32 %v) { +; X64-LABEL: sub_32r_seq_cst: +; X64: # %bb.0: +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: xchgl %eax, (%rdi) +; X64-NEXT: retq +; +; X32-LABEL: sub_32r_seq_cst: +; X32: # %bb.0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl (%eax), %ecx +; X32-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: xchgl %ecx, (%eax) +; X32-NEXT: retl + %1 = load atomic i32, i32* %p monotonic, align 4 + %2 = sub i32 %1, %v + store atomic i32 %2, i32* %p seq_cst, align 4 + ret void +} + ; ----- AND ----- define void @and_8i(i8* %p) { @@ -593,11 +775,11 @@ define void @and_64i(i64* %p) { ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB24_1: # %atomicrmw.start +; X32-NEXT: .LBB31_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB24_1 +; X32-NEXT: jne .LBB31_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 @@ -638,10 +820,10 @@ define void @and_64r(i64* %p, i64 %v) { ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB25_1: # %atomicrmw.start +; X32-NEXT: .LBB32_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB25_1 +; X32-NEXT: jne .LBB32_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 @@ -830,10 +1012,10 @@ define void @or_64i(i64* %p) { ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB34_1: # %atomicrmw.start +; X32-NEXT: .LBB41_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB34_1 +; X32-NEXT: jne .LBB41_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 @@ -874,10 +1056,10 @@ define void @or_64r(i64* %p, i64 %v) { ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB35_1: # %atomicrmw.start +; X32-NEXT: .LBB42_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB35_1 +; X32-NEXT: jne .LBB42_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 @@ -1066,10 +1248,10 @@ define void @xor_64i(i64* %p) { ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB44_1: # %atomicrmw.start +; X32-NEXT: .LBB51_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB44_1 +; X32-NEXT: jne .LBB51_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 @@ -1110,10 +1292,10 @@ define void @xor_64r(i64* %p, i64 %v) { ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB45_1: # %atomicrmw.start +; X32-NEXT: .LBB52_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB45_1 +; X32-NEXT: jne .LBB52_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 @@ -1266,10 +1448,10 @@ define void @inc_64(i64* %p) { ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB51_1: # %atomicrmw.start +; X32-NEXT: .LBB58_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB51_1 +; X32-NEXT: jne .LBB58_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 @@ -1413,10 +1595,10 @@ define void @dec_64(i64* %p) { ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB56_1: # %atomicrmw.start +; X32-NEXT: .LBB63_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB56_1 +; X32-NEXT: jne .LBB63_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 @@ -1545,10 +1727,10 @@ define void @not_64(i64* %p) { ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB61_1: # %atomicrmw.start +; X32-NEXT: .LBB68_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB61_1 +; X32-NEXT: jne .LBB68_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 8 @@ -1672,11 +1854,11 @@ define void @neg_64(i64* %p) { ; X32-NEXT: movl (%edi), %eax ; X32-NEXT: movl 4(%edi), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB66_1: # %atomicrmw.start +; X32-NEXT: .LBB73_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl %esi, %ecx ; X32-NEXT: lock cmpxchg8b (%edi) -; X32-NEXT: jne .LBB66_1 +; X32-NEXT: jne .LBB73_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 12 @@ -1784,10 +1966,10 @@ define void @fadd_64r(double* %loc, double %val) { ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB69_1: # %atomicrmw.start +; X32-NEXT: .LBB76_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: jne .LBB69_1 +; X32-NEXT: jne .LBB76_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi @@ -1874,10 +2056,10 @@ define void @fadd_64g() { ; X32-NEXT: movl glob64+4, %edx ; X32-NEXT: movl glob64, %eax ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB71_1: # %atomicrmw.start +; X32-NEXT: .LBB78_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b glob64 -; X32-NEXT: jne .LBB71_1 +; X32-NEXT: jne .LBB78_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: leal -4(%ebp), %esp ; X32-NEXT: popl %ebx @@ -1961,10 +2143,10 @@ define void @fadd_64imm() { ; X32-NEXT: movl -559038737, %eax ; X32-NEXT: movl -559038733, %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB73_1: # %atomicrmw.start +; X32-NEXT: .LBB80_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b -559038737 -; X32-NEXT: jne .LBB73_1 +; X32-NEXT: jne .LBB80_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: leal -4(%ebp), %esp ; X32-NEXT: popl %ebx @@ -2048,10 +2230,10 @@ define void @fadd_64stack() { ; X32-NEXT: movl (%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB75_1: # %atomicrmw.start +; X32-NEXT: .LBB82_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%esp) -; X32-NEXT: jne .LBB75_1 +; X32-NEXT: jne .LBB82_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: leal -4(%ebp), %esp ; X32-NEXT: popl %ebx @@ -2108,10 +2290,10 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) { ; X32-NEXT: movl (%edi,%esi,8), %eax ; X32-NEXT: movl 4(%edi,%esi,8), %edx ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB76_1: # %atomicrmw.start +; X32-NEXT: .LBB83_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: lock cmpxchg8b (%edi,%esi,8) -; X32-NEXT: jne .LBB76_1 +; X32-NEXT: jne .LBB83_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end ; X32-NEXT: leal -12(%ebp), %esp ; X32-NEXT: popl %esi |