summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/atomic_mi.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/atomic_mi.ll')
-rw-r--r--llvm/test/CodeGen/X86/atomic_mi.ll548
1 files changed, 448 insertions, 100 deletions
diff --git a/llvm/test/CodeGen/X86/atomic_mi.ll b/llvm/test/CodeGen/X86/atomic_mi.ll
index 7a6204fc893..8ea72fa5f27 100644
--- a/llvm/test/CodeGen/X86/atomic_mi.ll
+++ b/llvm/test/CodeGen/X86/atomic_mi.ll
@@ -14,7 +14,11 @@
; The binary operations supported are currently add, and, or, xor.
; sub is not supported because they are translated by an addition of the
; negated immediate.
-; Finally, we also check the same kind of pattern for inc/dec
+;
+; We also check the same patterns:
+; - For inc/dec.
+; - For register instead of immediate operands.
+; - For floating point operations.
; seq_cst stores are left as (lock) xchgl, but we try to check every other
; attribute at least once.
@@ -25,10 +29,10 @@
; an implicit lock prefix, so making it explicit is not required.
define void @store_atomic_imm_8(i8* %p) {
-; X64-LABEL: store_atomic_imm_8
+; X64-LABEL: store_atomic_imm_8:
; X64: movb
; X64-NOT: movb
-; X32-LABEL: store_atomic_imm_8
+; X32-LABEL: store_atomic_imm_8:
; X32: movb
; X32-NOT: movb
store atomic i8 42, i8* %p release, align 1
@@ -36,10 +40,10 @@ define void @store_atomic_imm_8(i8* %p) {
}
define void @store_atomic_imm_16(i16* %p) {
-; X64-LABEL: store_atomic_imm_16
+; X64-LABEL: store_atomic_imm_16:
; X64: movw
; X64-NOT: movw
-; X32-LABEL: store_atomic_imm_16
+; X32-LABEL: store_atomic_imm_16:
; X32: movw
; X32-NOT: movw
store atomic i16 42, i16* %p monotonic, align 2
@@ -47,12 +51,12 @@ define void @store_atomic_imm_16(i16* %p) {
}
define void @store_atomic_imm_32(i32* %p) {
-; X64-LABEL: store_atomic_imm_32
+; X64-LABEL: store_atomic_imm_32:
; X64: movl
; X64-NOT: movl
; On 32 bits, there is an extra movl for each of those functions
; (probably for alignment reasons).
-; X32-LABEL: store_atomic_imm_32
+; X32-LABEL: store_atomic_imm_32:
; X32: movl 4(%esp), %eax
; X32: movl
; X32-NOT: movl
@@ -61,12 +65,12 @@ define void @store_atomic_imm_32(i32* %p) {
}
define void @store_atomic_imm_64(i64* %p) {
-; X64-LABEL: store_atomic_imm_64
+; X64-LABEL: store_atomic_imm_64:
; X64: movq
; X64-NOT: movq
; These are implemented with a CAS loop on 32 bit architectures, and thus
; cannot be optimized in the same way as the others.
-; X32-LABEL: store_atomic_imm_64
+; X32-LABEL: store_atomic_imm_64:
; X32: cmpxchg8b
store atomic i64 42, i64* %p release, align 8
ret void
@@ -75,7 +79,7 @@ define void @store_atomic_imm_64(i64* %p) {
; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
; even on X64, one must use movabsq that can only target a register.
define void @store_atomic_imm_64_big(i64* %p) {
-; X64-LABEL: store_atomic_imm_64_big
+; X64-LABEL: store_atomic_imm_64_big:
; X64: movabsq
; X64: movq
store atomic i64 100000000000, i64* %p monotonic, align 8
@@ -84,9 +88,9 @@ define void @store_atomic_imm_64_big(i64* %p) {
; It would be incorrect to replace a lock xchgl by a movl
define void @store_atomic_imm_32_seq_cst(i32* %p) {
-; X64-LABEL: store_atomic_imm_32_seq_cst
+; X64-LABEL: store_atomic_imm_32_seq_cst:
; X64: xchgl
-; X32-LABEL: store_atomic_imm_32_seq_cst
+; X32-LABEL: store_atomic_imm_32_seq_cst:
; X32: xchgl
store atomic i32 42, i32* %p seq_cst, align 4
ret void
@@ -94,12 +98,12 @@ define void @store_atomic_imm_32_seq_cst(i32* %p) {
; ----- ADD -----
-define void @add_8(i8* %p) {
-; X64-LABEL: add_8
+define void @add_8i(i8* %p) {
+; X64-LABEL: add_8i:
; X64-NOT: lock
; X64: addb
; X64-NOT: movb
-; X32-LABEL: add_8
+; X32-LABEL: add_8i:
; X32-NOT: lock
; X32: addb
; X32-NOT: movb
@@ -109,12 +113,27 @@ define void @add_8(i8* %p) {
ret void
}
-define void @add_16(i16* %p) {
+define void @add_8r(i8* %p, i8 %v) {
+; X64-LABEL: add_8r:
+; X64-NOT: lock
+; X64: addb
+; X64-NOT: movb
+; X32-LABEL: add_8r:
+; X32-NOT: lock
+; X32: addb
+; X32-NOT: movb
+ %1 = load atomic i8, i8* %p seq_cst, align 1
+ %2 = add i8 %1, %v
+ store atomic i8 %2, i8* %p release, align 1
+ ret void
+}
+
+define void @add_16i(i16* %p) {
; Currently the transformation is not done on 16 bit accesses, as the backend
; treat 16 bit arithmetic as expensive on X86/X86_64.
-; X64-LABEL: add_16
+; X64-LABEL: add_16i:
; X64-NOT: addw
-; X32-LABEL: add_16
+; X32-LABEL: add_16i:
; X32-NOT: addw
%1 = load atomic i16, i16* %p acquire, align 2
%2 = add i16 %1, 2
@@ -122,12 +141,25 @@ define void @add_16(i16* %p) {
ret void
}
-define void @add_32(i32* %p) {
-; X64-LABEL: add_32
+define void @add_16r(i16* %p, i16 %v) {
+; Currently the transformation is not done on 16 bit accesses, as the backend
+; treat 16 bit arithmetic as expensive on X86/X86_64.
+; X64-LABEL: add_16r:
+; X64-NOT: addw
+; X32-LABEL: add_16r:
+; X32-NOT: addw [.*], (
+ %1 = load atomic i16, i16* %p acquire, align 2
+ %2 = add i16 %1, %v
+ store atomic i16 %2, i16* %p release, align 2
+ ret void
+}
+
+define void @add_32i(i32* %p) {
+; X64-LABEL: add_32i:
; X64-NOT: lock
; X64: addl
; X64-NOT: movl
-; X32-LABEL: add_32
+; X32-LABEL: add_32i:
; X32-NOT: lock
; X32: addl
; X32-NOT: movl
@@ -137,23 +169,94 @@ define void @add_32(i32* %p) {
ret void
}
-define void @add_64(i64* %p) {
-; X64-LABEL: add_64
+define void @add_32r(i32* %p, i32 %v) {
+; X64-LABEL: add_32r:
+; X64-NOT: lock
+; X64: addl
+; X64-NOT: movl
+; X32-LABEL: add_32r:
+; X32-NOT: lock
+; X32: addl
+; X32-NOT: movl
+ %1 = load atomic i32, i32* %p acquire, align 4
+ %2 = add i32 %1, %v
+ store atomic i32 %2, i32* %p monotonic, align 4
+ ret void
+}
+
+; The following is a corner case where the load is added to itself. The pattern
+; matching should not fold this. We only test with 32-bit add, but the same
+; applies to other sizes and operations.
+define void @add_32r_self(i32* %p) {
+; X64-LABEL: add_32r_self:
+; X64-NOT: lock
+; X64: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]]
+; X64: addl %[[R]], %[[R]]
+; X64: movl %[[R]], (%[[M]])
+; X32-LABEL: add_32r_self:
+; X32-NOT: lock
+; X32: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]]
+; X32: addl %[[R]], %[[R]]
+; X32: movl %[[R]], (%[[M]])
+ %1 = load atomic i32, i32* %p acquire, align 4
+ %2 = add i32 %1, %1
+ store atomic i32 %2, i32* %p monotonic, align 4
+ ret void
+}
+
+; The following is a corner case where the load's result is returned. The
+; optimizer isn't allowed to duplicate the load because it's atomic.
+define i32 @add_32r_ret_load(i32* %p, i32 %v) {
+; X64-LABEL: add_32r_ret_load:
+; X64-NOT: lock
+; X64: movl (%rdi), %eax
+; X64-NEXT: leal (%rsi,%rax), %ecx
+; X64-NEXT: movl %ecx, (%rdi)
+; X64-NEXT: retq
+; X32-LABEL: add_32r_ret_load:
+; X32-NOT: lock
+; X32: movl 4(%esp), %[[P:[a-z]+]]
+; X32-NEXT: movl (%[[P]]),
+; X32-NOT: %[[P]]
+; More code here, we just don't want it to load from P.
+; X32: movl %{{.*}}, (%[[P]])
+; X32-NEXT: retl
+ %1 = load atomic i32, i32* %p acquire, align 4
+ %2 = add i32 %1, %v
+ store atomic i32 %2, i32* %p monotonic, align 4
+ ret i32 %1
+}
+
+define void @add_64i(i64* %p) {
+; X64-LABEL: add_64i:
; X64-NOT: lock
; X64: addq
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'addq'.
-; X32-LABEL: add_64
+; X32-LABEL: add_64i:
%1 = load atomic i64, i64* %p acquire, align 8
%2 = add i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
}
-define void @add_32_seq_cst(i32* %p) {
-; X64-LABEL: add_32_seq_cst
+define void @add_64r(i64* %p, i64 %v) {
+; X64-LABEL: add_64r:
+; X64-NOT: lock
+; X64: addq
+; X64-NOT: movq
+; We do not check X86-32 as it cannot do 'addq'.
+; X32-LABEL: add_64r:
+ %1 = load atomic i64, i64* %p acquire, align 8
+ %2 = add i64 %1, %v
+ store atomic i64 %2, i64* %p release, align 8
+ ret void
+}
+
+define void @add_32i_seq_cst(i32* %p) {
+; X64-LABEL: add_32i_seq_cst:
; X64: xchgl
-; X32-LABEL: add_32_seq_cst
+; X32-LABEL: add_32i_seq_cst:
; X32: xchgl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = add i32 %1, 2
@@ -161,14 +264,25 @@ define void @add_32_seq_cst(i32* %p) {
ret void
}
+define void @add_32r_seq_cst(i32* %p, i32 %v) {
+; X64-LABEL: add_32r_seq_cst:
+; X64: xchgl
+; X32-LABEL: add_32r_seq_cst:
+; X32: xchgl
+ %1 = load atomic i32, i32* %p monotonic, align 4
+ %2 = add i32 %1, %v
+ store atomic i32 %2, i32* %p seq_cst, align 4
+ ret void
+}
+
; ----- AND -----
-define void @and_8(i8* %p) {
-; X64-LABEL: and_8
+define void @and_8i(i8* %p) {
+; X64-LABEL: and_8i:
; X64-NOT: lock
; X64: andb
; X64-NOT: movb
-; X32-LABEL: and_8
+; X32-LABEL: and_8i:
; X32-NOT: lock
; X32: andb
; X32-NOT: movb
@@ -178,12 +292,27 @@ define void @and_8(i8* %p) {
ret void
}
-define void @and_16(i16* %p) {
+define void @and_8r(i8* %p, i8 %v) {
+; X64-LABEL: and_8r:
+; X64-NOT: lock
+; X64: andb
+; X64-NOT: movb
+; X32-LABEL: and_8r:
+; X32-NOT: lock
+; X32: andb
+; X32-NOT: movb
+ %1 = load atomic i8, i8* %p monotonic, align 1
+ %2 = and i8 %1, %v
+ store atomic i8 %2, i8* %p release, align 1
+ ret void
+}
+
+define void @and_16i(i16* %p) {
; Currently the transformation is not done on 16 bit accesses, as the backend
; treat 16 bit arithmetic as expensive on X86/X86_64.
-; X64-LABEL: and_16
+; X64-LABEL: and_16i:
; X64-NOT: andw
-; X32-LABEL: and_16
+; X32-LABEL: and_16i:
; X32-NOT: andw
%1 = load atomic i16, i16* %p acquire, align 2
%2 = and i16 %1, 2
@@ -191,12 +320,25 @@ define void @and_16(i16* %p) {
ret void
}
-define void @and_32(i32* %p) {
-; X64-LABEL: and_32
+define void @and_16r(i16* %p, i16 %v) {
+; Currently the transformation is not done on 16 bit accesses, as the backend
+; treat 16 bit arithmetic as expensive on X86/X86_64.
+; X64-LABEL: and_16r:
+; X64-NOT: andw
+; X32-LABEL: and_16r:
+; X32-NOT: andw [.*], (
+ %1 = load atomic i16, i16* %p acquire, align 2
+ %2 = and i16 %1, %v
+ store atomic i16 %2, i16* %p release, align 2
+ ret void
+}
+
+define void @and_32i(i32* %p) {
+; X64-LABEL: and_32i:
; X64-NOT: lock
; X64: andl
; X64-NOT: movl
-; X32-LABEL: and_32
+; X32-LABEL: and_32i:
; X32-NOT: lock
; X32: andl
; X32-NOT: movl
@@ -206,23 +348,51 @@ define void @and_32(i32* %p) {
ret void
}
-define void @and_64(i64* %p) {
-; X64-LABEL: and_64
+define void @and_32r(i32* %p, i32 %v) {
+; X64-LABEL: and_32r:
+; X64-NOT: lock
+; X64: andl
+; X64-NOT: movl
+; X32-LABEL: and_32r:
+; X32-NOT: lock
+; X32: andl
+; X32-NOT: movl
+ %1 = load atomic i32, i32* %p acquire, align 4
+ %2 = and i32 %1, %v
+ store atomic i32 %2, i32* %p release, align 4
+ ret void
+}
+
+define void @and_64i(i64* %p) {
+; X64-LABEL: and_64i:
; X64-NOT: lock
; X64: andq
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'andq'.
-; X32-LABEL: and_64
+; X32-LABEL: and_64i:
%1 = load atomic i64, i64* %p acquire, align 8
%2 = and i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
}
-define void @and_32_seq_cst(i32* %p) {
-; X64-LABEL: and_32_seq_cst
+define void @and_64r(i64* %p, i64 %v) {
+; X64-LABEL: and_64r:
+; X64-NOT: lock
+; X64: andq
+; X64-NOT: movq
+; We do not check X86-32 as it cannot do 'andq'.
+; X32-LABEL: and_64r:
+ %1 = load atomic i64, i64* %p acquire, align 8
+ %2 = and i64 %1, %v
+ store atomic i64 %2, i64* %p release, align 8
+ ret void
+}
+
+define void @and_32i_seq_cst(i32* %p) {
+; X64-LABEL: and_32i_seq_cst:
; X64: xchgl
-; X32-LABEL: and_32_seq_cst
+; X32-LABEL: and_32i_seq_cst:
; X32: xchgl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = and i32 %1, 2
@@ -230,14 +400,25 @@ define void @and_32_seq_cst(i32* %p) {
ret void
}
+define void @and_32r_seq_cst(i32* %p, i32 %v) {
+; X64-LABEL: and_32r_seq_cst:
+; X64: xchgl
+; X32-LABEL: and_32r_seq_cst:
+; X32: xchgl
+ %1 = load atomic i32, i32* %p monotonic, align 4
+ %2 = and i32 %1, %v
+ store atomic i32 %2, i32* %p seq_cst, align 4
+ ret void
+}
+
; ----- OR -----
-define void @or_8(i8* %p) {
-; X64-LABEL: or_8
+define void @or_8i(i8* %p) {
+; X64-LABEL: or_8i:
; X64-NOT: lock
; X64: orb
; X64-NOT: movb
-; X32-LABEL: or_8
+; X32-LABEL: or_8i:
; X32-NOT: lock
; X32: orb
; X32-NOT: movb
@@ -247,10 +428,25 @@ define void @or_8(i8* %p) {
ret void
}
-define void @or_16(i16* %p) {
-; X64-LABEL: or_16
+define void @or_8r(i8* %p, i8 %v) {
+; X64-LABEL: or_8r:
+; X64-NOT: lock
+; X64: orb
+; X64-NOT: movb
+; X32-LABEL: or_8r:
+; X32-NOT: lock
+; X32: orb
+; X32-NOT: movb
+ %1 = load atomic i8, i8* %p acquire, align 1
+ %2 = or i8 %1, %v
+ store atomic i8 %2, i8* %p release, align 1
+ ret void
+}
+
+define void @or_16i(i16* %p) {
+; X64-LABEL: or_16i:
; X64-NOT: orw
-; X32-LABEL: or_16
+; X32-LABEL: or_16i:
; X32-NOT: orw
%1 = load atomic i16, i16* %p acquire, align 2
%2 = or i16 %1, 2
@@ -258,12 +454,23 @@ define void @or_16(i16* %p) {
ret void
}
-define void @or_32(i32* %p) {
-; X64-LABEL: or_32
+define void @or_16r(i16* %p, i16 %v) {
+; X64-LABEL: or_16r:
+; X64-NOT: orw
+; X32-LABEL: or_16r:
+; X32-NOT: orw [.*], (
+ %1 = load atomic i16, i16* %p acquire, align 2
+ %2 = or i16 %1, %v
+ store atomic i16 %2, i16* %p release, align 2
+ ret void
+}
+
+define void @or_32i(i32* %p) {
+; X64-LABEL: or_32i:
; X64-NOT: lock
; X64: orl
; X64-NOT: movl
-; X32-LABEL: or_32
+; X32-LABEL: or_32i:
; X32-NOT: lock
; X32: orl
; X32-NOT: movl
@@ -273,23 +480,51 @@ define void @or_32(i32* %p) {
ret void
}
-define void @or_64(i64* %p) {
-; X64-LABEL: or_64
+define void @or_32r(i32* %p, i32 %v) {
+; X64-LABEL: or_32r:
+; X64-NOT: lock
+; X64: orl
+; X64-NOT: movl
+; X32-LABEL: or_32r:
+; X32-NOT: lock
+; X32: orl
+; X32-NOT: movl
+ %1 = load atomic i32, i32* %p acquire, align 4
+ %2 = or i32 %1, %v
+ store atomic i32 %2, i32* %p release, align 4
+ ret void
+}
+
+define void @or_64i(i64* %p) {
+; X64-LABEL: or_64i:
; X64-NOT: lock
; X64: orq
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'orq'.
-; X32-LABEL: or_64
+; X32-LABEL: or_64i:
%1 = load atomic i64, i64* %p acquire, align 8
%2 = or i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
}
-define void @or_32_seq_cst(i32* %p) {
-; X64-LABEL: or_32_seq_cst
+define void @or_64r(i64* %p, i64 %v) {
+; X64-LABEL: or_64r:
+; X64-NOT: lock
+; X64: orq
+; X64-NOT: movq
+; We do not check X86-32 as it cannot do 'orq'.
+; X32-LABEL: or_64r:
+ %1 = load atomic i64, i64* %p acquire, align 8
+ %2 = or i64 %1, %v
+ store atomic i64 %2, i64* %p release, align 8
+ ret void
+}
+
+define void @or_32i_seq_cst(i32* %p) {
+; X64-LABEL: or_32i_seq_cst:
; X64: xchgl
-; X32-LABEL: or_32_seq_cst
+; X32-LABEL: or_32i_seq_cst:
; X32: xchgl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = or i32 %1, 2
@@ -297,14 +532,25 @@ define void @or_32_seq_cst(i32* %p) {
ret void
}
+define void @or_32r_seq_cst(i32* %p, i32 %v) {
+; X64-LABEL: or_32r_seq_cst:
+; X64: xchgl
+; X32-LABEL: or_32r_seq_cst:
+; X32: xchgl
+ %1 = load atomic i32, i32* %p monotonic, align 4
+ %2 = or i32 %1, %v
+ store atomic i32 %2, i32* %p seq_cst, align 4
+ ret void
+}
+
; ----- XOR -----
-define void @xor_8(i8* %p) {
-; X64-LABEL: xor_8
+define void @xor_8i(i8* %p) {
+; X64-LABEL: xor_8i:
; X64-NOT: lock
; X64: xorb
; X64-NOT: movb
-; X32-LABEL: xor_8
+; X32-LABEL: xor_8i:
; X32-NOT: lock
; X32: xorb
; X32-NOT: movb
@@ -314,10 +560,25 @@ define void @xor_8(i8* %p) {
ret void
}
-define void @xor_16(i16* %p) {
-; X64-LABEL: xor_16
+define void @xor_8r(i8* %p, i8 %v) {
+; X64-LABEL: xor_8r:
+; X64-NOT: lock
+; X64: xorb
+; X64-NOT: movb
+; X32-LABEL: xor_8r:
+; X32-NOT: lock
+; X32: xorb
+; X32-NOT: movb
+ %1 = load atomic i8, i8* %p acquire, align 1
+ %2 = xor i8 %1, %v
+ store atomic i8 %2, i8* %p release, align 1
+ ret void
+}
+
+define void @xor_16i(i16* %p) {
+; X64-LABEL: xor_16i:
; X64-NOT: xorw
-; X32-LABEL: xor_16
+; X32-LABEL: xor_16i:
; X32-NOT: xorw
%1 = load atomic i16, i16* %p acquire, align 2
%2 = xor i16 %1, 2
@@ -325,12 +586,23 @@ define void @xor_16(i16* %p) {
ret void
}
-define void @xor_32(i32* %p) {
-; X64-LABEL: xor_32
+define void @xor_16r(i16* %p, i16 %v) {
+; X64-LABEL: xor_16r:
+; X64-NOT: xorw
+; X32-LABEL: xor_16r:
+; X32-NOT: xorw [.*], (
+ %1 = load atomic i16, i16* %p acquire, align 2
+ %2 = xor i16 %1, %v
+ store atomic i16 %2, i16* %p release, align 2
+ ret void
+}
+
+define void @xor_32i(i32* %p) {
+; X64-LABEL: xor_32i:
; X64-NOT: lock
; X64: xorl
; X64-NOT: movl
-; X32-LABEL: xor_32
+; X32-LABEL: xor_32i:
; X32-NOT: lock
; X32: xorl
; X32-NOT: movl
@@ -340,23 +612,51 @@ define void @xor_32(i32* %p) {
ret void
}
-define void @xor_64(i64* %p) {
-; X64-LABEL: xor_64
+define void @xor_32r(i32* %p, i32 %v) {
+; X64-LABEL: xor_32r:
+; X64-NOT: lock
+; X64: xorl
+; X64-NOT: movl
+; X32-LABEL: xor_32r:
+; X32-NOT: lock
+; X32: xorl
+; X32-NOT: movl
+ %1 = load atomic i32, i32* %p acquire, align 4
+ %2 = xor i32 %1, %v
+ store atomic i32 %2, i32* %p release, align 4
+ ret void
+}
+
+define void @xor_64i(i64* %p) {
+; X64-LABEL: xor_64i:
; X64-NOT: lock
; X64: xorq
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'xorq'.
-; X32-LABEL: xor_64
+; X32-LABEL: xor_64i:
%1 = load atomic i64, i64* %p acquire, align 8
%2 = xor i64 %1, 2
store atomic i64 %2, i64* %p release, align 8
ret void
}
-define void @xor_32_seq_cst(i32* %p) {
-; X64-LABEL: xor_32_seq_cst
+define void @xor_64r(i64* %p, i64 %v) {
+; X64-LABEL: xor_64r:
+; X64-NOT: lock
+; X64: xorq
+; X64-NOT: movq
+; We do not check X86-32 as it cannot do 'xorq'.
+; X32-LABEL: xor_64r:
+ %1 = load atomic i64, i64* %p acquire, align 8
+ %2 = xor i64 %1, %v
+ store atomic i64 %2, i64* %p release, align 8
+ ret void
+}
+
+define void @xor_32i_seq_cst(i32* %p) {
+; X64-LABEL: xor_32i_seq_cst:
; X64: xchgl
-; X32-LABEL: xor_32_seq_cst
+; X32-LABEL: xor_32i_seq_cst:
; X32: xchgl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = xor i32 %1, 2
@@ -364,18 +664,29 @@ define void @xor_32_seq_cst(i32* %p) {
ret void
}
+define void @xor_32r_seq_cst(i32* %p, i32 %v) {
+; X64-LABEL: xor_32r_seq_cst:
+; X64: xchgl
+; X32-LABEL: xor_32r_seq_cst:
+; X32: xchgl
+ %1 = load atomic i32, i32* %p monotonic, align 4
+ %2 = xor i32 %1, %v
+ store atomic i32 %2, i32* %p seq_cst, align 4
+ ret void
+}
+
; ----- INC -----
define void @inc_8(i8* %p) {
-; X64-LABEL: inc_8
+; X64-LABEL: inc_8:
; X64-NOT: lock
; X64: incb
; X64-NOT: movb
-; X32-LABEL: inc_8
+; X32-LABEL: inc_8:
; X32-NOT: lock
; X32: incb
; X32-NOT: movb
-; SLOW_INC-LABEL: inc_8
+; SLOW_INC-LABEL: inc_8:
; SLOW_INC-NOT: incb
; SLOW_INC-NOT: movb
%1 = load atomic i8, i8* %p seq_cst, align 1
@@ -387,11 +698,11 @@ define void @inc_8(i8* %p) {
define void @inc_16(i16* %p) {
; Currently the transformation is not done on 16 bit accesses, as the backend
; treat 16 bit arithmetic as expensive on X86/X86_64.
-; X64-LABEL: inc_16
+; X64-LABEL: inc_16:
; X64-NOT: incw
-; X32-LABEL: inc_16
+; X32-LABEL: inc_16:
; X32-NOT: incw
-; SLOW_INC-LABEL: inc_16
+; SLOW_INC-LABEL: inc_16:
; SLOW_INC-NOT: incw
%1 = load atomic i16, i16* %p acquire, align 2
%2 = add i16 %1, 1
@@ -400,15 +711,15 @@ define void @inc_16(i16* %p) {
}
define void @inc_32(i32* %p) {
-; X64-LABEL: inc_32
+; X64-LABEL: inc_32:
; X64-NOT: lock
; X64: incl
; X64-NOT: movl
-; X32-LABEL: inc_32
+; X32-LABEL: inc_32:
; X32-NOT: lock
; X32: incl
; X32-NOT: movl
-; SLOW_INC-LABEL: inc_32
+; SLOW_INC-LABEL: inc_32:
; SLOW_INC-NOT: incl
; SLOW_INC-NOT: movl
%1 = load atomic i32, i32* %p acquire, align 4
@@ -418,13 +729,13 @@ define void @inc_32(i32* %p) {
}
define void @inc_64(i64* %p) {
-; X64-LABEL: inc_64
+; X64-LABEL: inc_64:
; X64-NOT: lock
; X64: incq
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'incq'.
-; X32-LABEL: inc_64
-; SLOW_INC-LABEL: inc_64
+; X32-LABEL: inc_64:
+; SLOW_INC-LABEL: inc_64:
; SLOW_INC-NOT: incq
; SLOW_INC-NOT: movq
%1 = load atomic i64, i64* %p acquire, align 8
@@ -434,9 +745,9 @@ define void @inc_64(i64* %p) {
}
define void @inc_32_seq_cst(i32* %p) {
-; X64-LABEL: inc_32_seq_cst
+; X64-LABEL: inc_32_seq_cst:
; X64: xchgl
-; X32-LABEL: inc_32_seq_cst
+; X32-LABEL: inc_32_seq_cst:
; X32: xchgl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = add i32 %1, 1
@@ -447,15 +758,15 @@ define void @inc_32_seq_cst(i32* %p) {
; ----- DEC -----
define void @dec_8(i8* %p) {
-; X64-LABEL: dec_8
+; X64-LABEL: dec_8:
; X64-NOT: lock
; X64: decb
; X64-NOT: movb
-; X32-LABEL: dec_8
+; X32-LABEL: dec_8:
; X32-NOT: lock
; X32: decb
; X32-NOT: movb
-; SLOW_INC-LABEL: dec_8
+; SLOW_INC-LABEL: dec_8:
; SLOW_INC-NOT: decb
; SLOW_INC-NOT: movb
%1 = load atomic i8, i8* %p seq_cst, align 1
@@ -467,11 +778,11 @@ define void @dec_8(i8* %p) {
define void @dec_16(i16* %p) {
; Currently the transformation is not done on 16 bit accesses, as the backend
; treat 16 bit arithmetic as expensive on X86/X86_64.
-; X64-LABEL: dec_16
+; X64-LABEL: dec_16:
; X64-NOT: decw
-; X32-LABEL: dec_16
+; X32-LABEL: dec_16:
; X32-NOT: decw
-; SLOW_INC-LABEL: dec_16
+; SLOW_INC-LABEL: dec_16:
; SLOW_INC-NOT: decw
%1 = load atomic i16, i16* %p acquire, align 2
%2 = sub i16 %1, 1
@@ -480,15 +791,15 @@ define void @dec_16(i16* %p) {
}
define void @dec_32(i32* %p) {
-; X64-LABEL: dec_32
+; X64-LABEL: dec_32:
; X64-NOT: lock
; X64: decl
; X64-NOT: movl
-; X32-LABEL: dec_32
+; X32-LABEL: dec_32:
; X32-NOT: lock
; X32: decl
; X32-NOT: movl
-; SLOW_INC-LABEL: dec_32
+; SLOW_INC-LABEL: dec_32:
; SLOW_INC-NOT: decl
; SLOW_INC-NOT: movl
%1 = load atomic i32, i32* %p acquire, align 4
@@ -498,13 +809,13 @@ define void @dec_32(i32* %p) {
}
define void @dec_64(i64* %p) {
-; X64-LABEL: dec_64
+; X64-LABEL: dec_64:
; X64-NOT: lock
; X64: decq
; X64-NOT: movq
; We do not check X86-32 as it cannot do 'decq'.
-; X32-LABEL: dec_64
-; SLOW_INC-LABEL: dec_64
+; X32-LABEL: dec_64:
+; SLOW_INC-LABEL: dec_64:
; SLOW_INC-NOT: decq
; SLOW_INC-NOT: movq
%1 = load atomic i64, i64* %p acquire, align 8
@@ -514,12 +825,49 @@ define void @dec_64(i64* %p) {
}
define void @dec_32_seq_cst(i32* %p) {
-; X64-LABEL: dec_32_seq_cst
+; X64-LABEL: dec_32_seq_cst:
; X64: xchgl
-; X32-LABEL: dec_32_seq_cst
+; X32-LABEL: dec_32_seq_cst:
; X32: xchgl
%1 = load atomic i32, i32* %p monotonic, align 4
%2 = sub i32 %1, 1
store atomic i32 %2, i32* %p seq_cst, align 4
ret void
}
+
+; ----- FADD -----
+
+define void @fadd_32r(float* %loc, float %val) {
+; X64-LABEL: fadd_32r:
+; X64-NOT: lock
+; X64-NOT: mov
+; X64: addss (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: movss %[[XMM]], (%[[M]])
+; X32-LABEL: fadd_32r:
+; Don't check x86-32.
+; LLVM's SSE handling is conservative on x86-32 even without using atomics.
+ %floc = bitcast float* %loc to i32*
+ %1 = load atomic i32, i32* %floc seq_cst, align 4
+ %2 = bitcast i32 %1 to float
+ %add = fadd float %2, %val
+ %3 = bitcast float %add to i32
+ store atomic i32 %3, i32* %floc release, align 4
+ ret void
+}
+
+define void @fadd_64r(double* %loc, double %val) {
+; X64-LABEL: fadd_64r:
+; X64-NOT: lock
+; X64-NOT: mov
+; X64: addsd (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: movsd %[[XMM]], (%[[M]])
+; X32-LABEL: fadd_64r:
+; Don't check x86-32 (see comment above).
+ %floc = bitcast double* %loc to i64*
+ %1 = load atomic i64, i64* %floc seq_cst, align 8
+ %2 = bitcast i64 %1 to double
+ %add = fadd double %2, %val
+ %3 = bitcast double %add to i64
+ store atomic i64 %3, i64* %floc release, align 8
+ ret void
+}
OpenPOWER on IntegriCloud