diff options
author | Craig Topper <craig.topper@intel.com> | 2018-11-05 19:45:37 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-11-05 19:45:37 +0000 |
commit | ab896b08d4f85fb0c452fc51569ca627a83dd64f (patch) | |
tree | ab654a993c0eeb70426b9f17f887ffae6b378324 /llvm/test/CodeGen/X86 | |
parent | 8a0eb44398e7c34750264344bfc5ce49e922b5a8 (diff) | |
download | bcm5719-llvm-ab896b08d4f85fb0c452fc51569ca627a83dd64f.tar.gz bcm5719-llvm-ab896b08d4f85fb0c452fc51569ca627a83dd64f.zip |
[X86] Regenerate test checks in preparation for a patch. NFC
I'm preparing a patch to avoid creating critical edges in cmov expansion. Updating these tests to make the changes by the next patch easier to see.
llvm-svn: 346161
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r-- | llvm/test/CodeGen/X86/MachineSink-eflags.ll | 33 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/atomic32.ll | 250 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fdiv-combine.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fp128-compare.ll | 133 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll | 39 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr5145.ll | 85 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pseudo_cmov_lower2.ll | 133 |
7 files changed, 456 insertions, 235 deletions
diff --git a/llvm/test/CodeGen/X86/MachineSink-eflags.ll b/llvm/test/CodeGen/X86/MachineSink-eflags.ll index 4e52c8c5f7d..6302b3be671 100644 --- a/llvm/test/CodeGen/X86/MachineSink-eflags.ll +++ b/llvm/test/CodeGen/X86/MachineSink-eflags.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux" @@ -11,6 +12,36 @@ target triple = "x86_64-pc-linux" %5 = type <{ void (i32)*, i8*, i32 (i8*, ...)* }> define void @foo(i8* nocapture %_stubArgs) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq 48(%rdi), %rax +; CHECK-NEXT: movl 64(%rdi), %edx +; CHECK-NEXT: movl $200, %esi +; CHECK-NEXT: addl 68(%rdi), %esi +; CHECK-NEXT: imull $46, %edx, %ecx +; CHECK-NEXT: addq %rsi, %rcx +; CHECK-NEXT: shlq $4, %rcx +; CHECK-NEXT: imull $47, %edx, %edx +; CHECK-NEXT: addq %rsi, %rdx +; CHECK-NEXT: shlq $4, %rdx +; CHECK-NEXT: movaps (%rax,%rdx), %xmm0 +; CHECK-NEXT: cmpl $0, (%rdi) +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: movaps (%rax,%rcx), %xmm1 +; CHECK-NEXT: .LBB0_3: # %entry +; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rsp +; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: jne .LBB0_5 +; CHECK-NEXT: # %bb.4: # %entry +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: .LBB0_5: # %entry +; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: addq $152, %rsp +; CHECK-NEXT: retq entry: %i0 = alloca i8*, align 8 %i2 = alloca i8*, align 8 @@ -60,8 +91,6 @@ entry: %cmp432.i = icmp ult i32 %tmp156.i, %tmp1 ; %shl.i should not be sinked below the compare. -; CHECK: cmpl -; CHECK-NOT: shlq %cond.i = select i1 %cmp432.i, <2 x double> %tmp162.i, <2 x double> zeroinitializer store <2 x double> %cond.i, <2 x double>* %ptr4438.i, align 16 diff --git a/llvm/test/CodeGen/X86/atomic32.ll b/llvm/test/CodeGen/X86/atomic32.ll index 7e2bff4116c..5e78444eea7 100644 --- a/llvm/test/CodeGen/X86/atomic32.ll +++ b/llvm/test/CodeGen/X86/atomic32.ll @@ -61,22 +61,22 @@ define void @atomic_fetch_and32() nounwind { ; X64: # %bb.0: ; X64-NEXT: lock andl $3, {{.*}}(%rip) ; X64-NEXT: movl sc32, %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB2_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) ; X64-NEXT: sete %dl ; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill -; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB2_2 ; X64-NEXT: jmp .LBB2_1 ; X64-NEXT: .LBB2_2: # %atomicrmw.end -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; X64-NEXT: lock andl %eax, {{.*}}(%rip) ; X64-NEXT: retq ; @@ -85,10 +85,10 @@ define void @atomic_fetch_and32() nounwind { ; X86-NEXT: subl $8, %esp ; X86-NEXT: lock andl $3, sc32 ; X86-NEXT: movl sc32, %eax -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: .LBB2_1: # %atomicrmw.start ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 @@ -96,7 +96,7 @@ define void @atomic_fetch_and32() nounwind { ; X86-NEXT: testb $1, %dl ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB2_2 ; X86-NEXT: jmp .LBB2_1 ; X86-NEXT: .LBB2_2: # %atomicrmw.end @@ -115,22 +115,22 @@ define void @atomic_fetch_or32() nounwind { ; X64: # %bb.0: ; X64-NEXT: lock orl $3, {{.*}}(%rip) ; X64-NEXT: movl sc32, %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB3_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: orl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) ; X64-NEXT: sete %dl ; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill -; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB3_2 ; X64-NEXT: jmp .LBB3_1 ; X64-NEXT: .LBB3_2: # %atomicrmw.end -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; X64-NEXT: lock orl %eax, {{.*}}(%rip) ; X64-NEXT: retq ; @@ -139,10 +139,10 @@ define void @atomic_fetch_or32() nounwind { ; X86-NEXT: subl $8, %esp ; X86-NEXT: lock orl $3, sc32 ; X86-NEXT: movl sc32, %eax -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: .LBB3_1: # %atomicrmw.start ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: orl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 @@ -150,7 +150,7 @@ define void @atomic_fetch_or32() nounwind { ; X86-NEXT: testb $1, %dl ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB3_2 ; X86-NEXT: jmp .LBB3_1 ; X86-NEXT: .LBB3_2: # %atomicrmw.end @@ -169,22 +169,22 @@ define void @atomic_fetch_xor32() nounwind { ; X64: # %bb.0: ; X64-NEXT: lock xorl $3, {{.*}}(%rip) ; X64-NEXT: movl sc32, %eax -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB4_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: xorl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) ; X64-NEXT: sete %dl ; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill -; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB4_2 ; X64-NEXT: jmp .LBB4_1 ; X64-NEXT: .LBB4_2: # %atomicrmw.end -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; X64-NEXT: lock xorl %eax, {{.*}}(%rip) ; X64-NEXT: retq ; @@ -193,10 +193,10 @@ define void @atomic_fetch_xor32() nounwind { ; X86-NEXT: subl $8, %esp ; X86-NEXT: lock xorl $3, sc32 ; X86-NEXT: movl sc32, %eax -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: .LBB4_1: # %atomicrmw.start ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: xorl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 @@ -204,7 +204,7 @@ define void @atomic_fetch_xor32() nounwind { ; X86-NEXT: testb $1, %dl ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB4_2 ; X86-NEXT: jmp .LBB4_1 ; X86-NEXT: .LBB4_2: # %atomicrmw.end @@ -222,19 +222,19 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_nand32: ; X64: # %bb.0: ; X64-NEXT: movl sc32, %eax -; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB5_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %edx # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload ; X64-NEXT: andl %edx, %ecx ; X64-NEXT: notl %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) ; X64-NEXT: sete %sil ; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB5_2 ; X64-NEXT: jmp .LBB5_1 ; X64-NEXT: .LBB5_2: # %atomicrmw.end @@ -246,13 +246,13 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl sc32, %ecx -; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-NEXT: .LBB5_1: # %atomicrmw.start ; X86-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: andl %edx, %ecx ; X86-NEXT: notl %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 @@ -273,20 +273,20 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_max32: ; X64: # %bb.0: ; X64-NEXT: movl sc32, %eax -; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB6_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %edx # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmovgel %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) ; X64-NEXT: sete %sil ; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB6_2 ; X64-NEXT: jmp .LBB6_1 ; X64-NEXT: .LBB6_2: # %atomicrmw.end @@ -298,20 +298,20 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx -; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB6_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmovgel %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 ; X86-CMOV-NEXT: sete %bl ; X86-CMOV-NEXT: testb $1, %bl ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB6_2 ; X86-CMOV-NEXT: jmp .LBB6_1 ; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end @@ -326,34 +326,34 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: subl $24, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOCMOV-NEXT: movl sc32, %ecx -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB6_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx ; X86-NOCMOV-NEXT: movl %eax, %esi -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jge .LBB6_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB6_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 ; X86-NOCMOV-NEXT: sete %bl ; X86-NOCMOV-NEXT: testb $1, %bl -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB6_2 ; X86-NOCMOV-NEXT: jmp .LBB6_1 ; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end @@ -369,20 +369,20 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_min32: ; X64: # %bb.0: ; X64-NEXT: movl sc32, %eax -; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB7_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %edx # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmovlel %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) ; X64-NEXT: sete %sil ; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB7_2 ; X64-NEXT: jmp .LBB7_1 ; X64-NEXT: .LBB7_2: # %atomicrmw.end @@ -394,20 +394,20 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx -; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB7_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmovlel %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 ; X86-CMOV-NEXT: sete %bl ; X86-CMOV-NEXT: testb $1, %bl ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB7_2 ; X86-CMOV-NEXT: jmp .LBB7_1 ; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end @@ -422,34 +422,34 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: subl $24, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOCMOV-NEXT: movl sc32, %ecx -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB7_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx ; X86-NOCMOV-NEXT: movl %eax, %esi -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jle .LBB7_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB7_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 ; X86-NOCMOV-NEXT: sete %bl ; X86-NOCMOV-NEXT: testb $1, %bl -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB7_2 ; X86-NOCMOV-NEXT: jmp .LBB7_1 ; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end @@ -465,20 +465,20 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_umax32: ; X64: # %bb.0: ; X64-NEXT: movl sc32, %eax -; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB8_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %edx # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmoval %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) ; X64-NEXT: sete %sil ; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB8_2 ; X64-NEXT: jmp .LBB8_1 ; X64-NEXT: .LBB8_2: # %atomicrmw.end @@ -490,20 +490,20 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx -; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB8_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmoval %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 ; X86-CMOV-NEXT: sete %bl ; X86-CMOV-NEXT: testb $1, %bl ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB8_2 ; X86-CMOV-NEXT: jmp .LBB8_1 ; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end @@ -518,34 +518,34 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: subl $24, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOCMOV-NEXT: movl sc32, %ecx -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB8_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx ; X86-NOCMOV-NEXT: movl %eax, %esi -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: ja .LBB8_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB8_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 ; X86-NOCMOV-NEXT: sete %bl ; X86-NOCMOV-NEXT: testb $1, %bl -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB8_2 ; X86-NOCMOV-NEXT: jmp .LBB8_1 ; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end @@ -561,20 +561,20 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_umin32: ; X64: # %bb.0: ; X64-NEXT: movl sc32, %eax -; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB9_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %edx # 4-byte Reload +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmovbel %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) ; X64-NEXT: sete %sil ; X64-NEXT: testb $1, %sil -; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) # 4-byte Spill -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB9_2 ; X64-NEXT: jmp .LBB9_1 ; X64-NEXT: .LBB9_2: # %atomicrmw.end @@ -586,20 +586,20 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx -; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB9_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmovbel %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 ; X86-CMOV-NEXT: sete %bl ; X86-CMOV-NEXT: testb $1, %bl ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-CMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB9_2 ; X86-CMOV-NEXT: jmp .LBB9_1 ; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end @@ -614,34 +614,34 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X86-NOCMOV-NEXT: subl $24, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOCMOV-NEXT: movl sc32, %ecx -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB9_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx ; X86-NOCMOV-NEXT: movl %eax, %esi -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jbe .LBB9_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB9_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload -; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 ; X86-NOCMOV-NEXT: sete %bl ; X86-NOCMOV-NEXT: testb $1, %bl -; X86-NOCMOV-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB9_2 ; X86-NOCMOV-NEXT: jmp .LBB9_1 ; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end @@ -659,7 +659,7 @@ define void @atomic_fetch_cmpxchg32() nounwind { ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: movl $1, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: retq ; ; X86-LABEL: atomic_fetch_cmpxchg32: @@ -694,7 +694,7 @@ define void @atomic_fetch_swap32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_swap32: ; X64: # %bb.0: ; X64-NEXT: xchgl %edi, {{.*}}(%rip) -; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # 4-byte Spill +; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: retq ; ; X86-LABEL: atomic_fetch_swap32: diff --git a/llvm/test/CodeGen/X86/fdiv-combine.ll b/llvm/test/CodeGen/X86/fdiv-combine.ll index 62e86e3ad2c..c0c5baa2c8b 100644 --- a/llvm/test/CodeGen/X86/fdiv-combine.ll +++ b/llvm/test/CodeGen/X86/fdiv-combine.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s ; More than one 'arcp' division using a single divisor operand @@ -79,12 +80,12 @@ define float @div2_arcp_partial3(float %x, float %y, float %z) { } ; If the reciprocal is already calculated, we should not -; generate an extra multiplication by 1.0. +; generate an extra multiplication by 1.0. define double @div3_arcp(double %x, double %y, double %z) { ; CHECK-LABEL: div3_arcp: ; CHECK: # %bb.0: -; CHECK-NEXT: movsd{{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero ; CHECK-NEXT: divsd %xmm1, %xmm2 ; CHECK-NEXT: mulsd %xmm2, %xmm0 ; CHECK-NEXT: addsd %xmm2, %xmm0 @@ -132,9 +133,16 @@ define float @div_select_constant_fold_zero(i1 zeroext %arg) { define void @PR24141() { ; CHECK-LABEL: PR24141: -; CHECK: callq -; CHECK-NEXT: divsd -; CHECK-NEXT: jmp +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: # implicit-def: $xmm0 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB8_1: # %while.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: callq g +; CHECK-NEXT: divsd %xmm1, %xmm0 +; CHECK-NEXT: jmp .LBB8_1 entry: br label %while.body diff --git a/llvm/test/CodeGen/X86/fp128-compare.ll b/llvm/test/CodeGen/X86/fp128-compare.ll index 7ee2e90657c..6f2b0c514a8 100644 --- a/llvm/test/CodeGen/X86/fp128-compare.ll +++ b/llvm/test/CodeGen/X86/fp128-compare.ll @@ -1,103 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx \ ; RUN: -enable-legalize-types-checking | FileCheck %s ; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx \ ; RUN: -enable-legalize-types-checking | FileCheck %s define i32 @TestComp128GT(fp128 %d1, fp128 %d2) { +; CHECK-LABEL: TestComp128GT: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq __gttf2 +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: setg %cl +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %cmp = fcmp ogt fp128 %d1, %d2 %conv = zext i1 %cmp to i32 ret i32 %conv -; CHECK-LABEL: TestComp128GT: -; CHECK: callq __gttf2 -; CHECK: xorl %ecx, %ecx -; CHECK: setg %cl -; CHECK: movl %ecx, %eax -; CHECK: retq } define i32 @TestComp128GE(fp128 %d1, fp128 %d2) { +; CHECK-LABEL: TestComp128GE: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq __getf2 +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: setns %cl +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %cmp = fcmp oge fp128 %d1, %d2 %conv = zext i1 %cmp to i32 ret i32 %conv -; CHECK-LABEL: TestComp128GE: -; CHECK: callq __getf2 -; CHECK: xorl %ecx, %ecx -; CHECK: testl %eax, %eax -; CHECK: setns %cl -; CHECK: movl %ecx, %eax -; CHECK: retq } define i32 @TestComp128LT(fp128 %d1, fp128 %d2) { +; CHECK-LABEL: TestComp128LT: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq __lttf2 +; CHECK-NEXT: shrl $31, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %cmp = fcmp olt fp128 %d1, %d2 %conv = zext i1 %cmp to i32 ret i32 %conv -; CHECK-LABEL: TestComp128LT: -; CHECK: callq __lttf2 -; CHECK-NEXT: shrl $31, %eax -; CHECK: retq -; ; The 'shrl' is a special optimization in llvm to combine ; the effect of 'fcmp olt' and 'zext'. The main purpose is ; to test soften call to __lttf2. } define i32 @TestComp128LE(fp128 %d1, fp128 %d2) { +; CHECK-LABEL: TestComp128LE: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq __letf2 +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: setle %cl +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %cmp = fcmp ole fp128 %d1, %d2 %conv = zext i1 %cmp to i32 ret i32 %conv -; CHECK-LABEL: TestComp128LE: -; CHECK: callq __letf2 -; CHECK: xorl %ecx, %ecx -; CHECK: testl %eax, %eax -; CHECK: setle %cl -; CHECK: movl %ecx, %eax -; CHECK: retq } define i32 @TestComp128EQ(fp128 %d1, fp128 %d2) { +; CHECK-LABEL: TestComp128EQ: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq __eqtf2 +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: sete %cl +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %cmp = fcmp oeq fp128 %d1, %d2 %conv = zext i1 %cmp to i32 ret i32 %conv -; CHECK-LABEL: TestComp128EQ: -; CHECK: callq __eqtf2 -; CHECK: xorl %ecx, %ecx -; CHECK: testl %eax, %eax -; CHECK: sete %cl -; CHECK: movl %ecx, %eax -; CHECK: retq } define i32 @TestComp128NE(fp128 %d1, fp128 %d2) { +; CHECK-LABEL: TestComp128NE: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq __netf2 +; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: setne %cl +; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %cmp = fcmp une fp128 %d1, %d2 %conv = zext i1 %cmp to i32 ret i32 %conv -; CHECK-LABEL: TestComp128NE: -; CHECK: callq __netf2 -; CHECK: xorl %ecx, %ecx -; CHECK: testl %eax, %eax -; CHECK: setne %cl -; CHECK: movl %ecx, %eax -; CHECK: retq } define fp128 @TestMax(fp128 %x, fp128 %y) { +; CHECK-LABEL: TestMax: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; CHECK-NEXT: callq __gttf2 +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jg .LBB6_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: .LBB6_2: # %entry +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %cmp = fcmp ogt fp128 %x, %y %cond = select i1 %cmp, fp128 %x, fp128 %y ret fp128 %cond -; CHECK-LABEL: TestMax: -; CHECK: movaps %xmm0 -; CHECK: movaps %xmm1 -; CHECK: callq __gttf2 -; CHECK: movaps {{.*}}, %xmm0 -; CHECK: testl %eax, %eax -; CHECK: movaps {{.*}}, %xmm0 -; CHECK: retq } diff --git a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll index 6369ee4eb0e..c9e8c636186 100644 --- a/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll +++ b/llvm/test/CodeGen/X86/machine-trace-metrics-crash.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s ; The debug info in this test case was causing a crash because machine trace metrics @@ -6,9 +7,41 @@ ; used machine trace metrics. define void @PR24199() { -; CHECK-LABEL: PR24199: -; CHECK: addss %xmm1, %xmm0 -; CHECK: addss %xmm2, %xmm0 +; CHECK-LABEL: PR24199: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB0_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_2: # %if.then +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: .LBB0_3: # %if.end +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: callq foo +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 4-byte Reload +; CHECK-NEXT: # xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: mulss %xmm0, %xmm2 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: addss %xmm2, %xmm0 +; CHECK-NEXT: movss %xmm0, (%rax) +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: jne .LBB0_5 +; CHECK-NEXT: # %bb.4: # %if.end +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: .LBB0_5: # %if.end +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: addss %xmm0, %xmm0 +; CHECK-NEXT: addss %xmm1, %xmm0 +; CHECK-NEXT: callq bar +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq entry: %i = alloca %struct.A, align 8 diff --git a/llvm/test/CodeGen/X86/pr5145.ll b/llvm/test/CodeGen/X86/pr5145.ll index 7da7c299791..02e9b4c1593 100644 --- a/llvm/test/CodeGen/X86/pr5145.ll +++ b/llvm/test/CodeGen/X86/pr5145.ll @@ -1,31 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s @sc8 = external global i8 define void @atomic_maxmin_i8() { -; CHECK: atomic_maxmin_i8 +; CHECK-LABEL: atomic_maxmin_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: movb {{.*}}(%rip), %al +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %atomicrmw.start +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpb $4, %al +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: jg .LBB0_3 +; CHECK-NEXT: # %bb.2: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movb $5, %cl +; CHECK-NEXT: .LBB0_3: # %atomicrmw.start +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: # %bb.4: # %atomicrmw.end +; CHECK-NEXT: movb {{.*}}(%rip), %al +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_5: # %atomicrmw.start2 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpb $7, %al +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: jl .LBB0_7 +; CHECK-NEXT: # %bb.6: # %atomicrmw.start2 +; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: movb $6, %cl +; CHECK-NEXT: .LBB0_7: # %atomicrmw.start2 +; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) +; CHECK-NEXT: jne .LBB0_5 +; CHECK-NEXT: # %bb.8: # %atomicrmw.end1 +; CHECK-NEXT: movb {{.*}}(%rip), %al +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_9: # %atomicrmw.start8 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpb $7, %al +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: ja .LBB0_11 +; CHECK-NEXT: # %bb.10: # %atomicrmw.start8 +; CHECK-NEXT: # in Loop: Header=BB0_9 Depth=1 +; CHECK-NEXT: movb $7, %cl +; CHECK-NEXT: .LBB0_11: # %atomicrmw.start8 +; CHECK-NEXT: # in Loop: Header=BB0_9 Depth=1 +; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) +; CHECK-NEXT: jne .LBB0_9 +; CHECK-NEXT: # %bb.12: # %atomicrmw.end7 +; CHECK-NEXT: movb {{.*}}(%rip), %al +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_13: # %atomicrmw.start14 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmpb $9, %al +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: jb .LBB0_15 +; CHECK-NEXT: # %bb.14: # %atomicrmw.start14 +; CHECK-NEXT: # in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movb $8, %cl +; CHECK-NEXT: .LBB0_15: # %atomicrmw.start14 +; CHECK-NEXT: # in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip) +; CHECK-NEXT: jne .LBB0_13 +; CHECK-NEXT: # %bb.16: # %atomicrmw.end13 +; CHECK-NEXT: retq %1 = atomicrmw max i8* @sc8, i8 5 acquire -; CHECK: [[LABEL1:\.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpb -; CHECK: jg -; CHECK: lock cmpxchgb -; CHECK: jne [[LABEL1]] %2 = atomicrmw min i8* @sc8, i8 6 acquire -; CHECK: [[LABEL3:\.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpb -; CHECK: jl -; CHECK: lock cmpxchgb -; CHECK: jne [[LABEL3]] %3 = atomicrmw umax i8* @sc8, i8 7 acquire -; CHECK: [[LABEL5:\.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpb -; CHECK: ja -; CHECK: lock cmpxchgb -; CHECK: jne [[LABEL5]] %4 = atomicrmw umin i8* @sc8, i8 8 acquire -; CHECK: [[LABEL7:\.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpb -; CHECK: jb -; CHECK: lock cmpxchgb -; CHECK: jne [[LABEL7]] ret void } diff --git a/llvm/test/CodeGen/X86/pseudo_cmov_lower2.ll b/llvm/test/CodeGen/X86/pseudo_cmov_lower2.ll index 1a61b0b9700..5218e1f0cee 100644 --- a/llvm/test/CodeGen/X86/pseudo_cmov_lower2.ll +++ b/llvm/test/CodeGen/X86/pseudo_cmov_lower2.ll @@ -1,14 +1,29 @@ -; RUN: llc < %s -mtriple=x86_64-linux-gnu -o - | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux-gnu -o - | FileCheck %s ; This test checks that only a single jae gets generated in the final code ; for lowering the CMOV pseudos that get created for this IR. The tricky part ; of this test is that it tests the special PHI operand rewriting code in ; X86TargetLowering::EmitLoweredSelect. ; -; CHECK-LABEL: foo1: -; CHECK: jae -; CHECK-NOT: jae define double @foo1(float %p1, double %p2, double %p3) nounwind { +; CHECK-LABEL: foo1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorps %xmm3, %xmm3 +; CHECK-NEXT: ucomiss %xmm3, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: jae .LBB0_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: addsd %xmm2, %xmm0 +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: addsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movapd %xmm1, %xmm2 +; CHECK-NEXT: .LBB0_3: # %entry +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: addsd %xmm2, %xmm0 +; CHECK-NEXT: retq entry: %c1 = fcmp oge float %p1, 0.000000e+00 %d0 = fadd double %p2, 1.25e0 @@ -26,10 +41,24 @@ entry: ; of this test is that it tests the special PHI operand rewriting code in ; X86TargetLowering::EmitLoweredSelect. ; -; CHECK-LABEL: foo2: -; CHECK: jae -; CHECK-NOT: jae define double @foo2(float %p1, double %p2, double %p3) nounwind { +; CHECK-LABEL: foo2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorps %xmm3, %xmm3 +; CHECK-NEXT: ucomiss %xmm3, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: jae .LBB1_1 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: addsd %xmm0, %xmm2 +; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: movapd %xmm2, %xmm1 +; CHECK-NEXT: jmp .LBB1_3 +; CHECK-NEXT: .LBB1_1: +; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: .LBB1_3: # %entry +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: addsd %xmm2, %xmm0 +; CHECK-NEXT: retq entry: %c1 = fcmp oge float %p1, 0.000000e+00 %d0 = fadd double %p2, 1.25e0 @@ -48,16 +77,17 @@ entry: ; X86TargetLowering::EmitLoweredSelect. It also tests to make sure all ; the operands of the resulting instructions are from the proper places. ; -; CHECK-LABEL: foo3: -; CHECK: js -; CHECK-NOT: js -; CHECK-LABEL: # %bb.1: -; CHECK-DAG: movapd %xmm2, %xmm1 -; CHECK-DAG: movapd %xmm2, %xmm0 -; CHECK-LABEL:.LBB2_2: -; CHECK: divsd %xmm1, %xmm0 -; CHECK: ret define double @foo3(i32 %p1, double %p2, double %p3, +; CHECK-LABEL: foo3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: js .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movapd %xmm2, %xmm1 +; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: divsd %xmm1, %xmm0 +; CHECK-NEXT: retq double %p4, double %p5) nounwind { entry: %c1 = icmp slt i32 %p1, 0 @@ -78,16 +108,17 @@ entry: ; condition code in the second two selects, but we also swap the operands ; of the selects to give the same actual computation. ; -; CHECK-LABEL: foo4: -; CHECK: js -; CHECK-NOT: js -; CHECK-LABEL: # %bb.1: -; CHECK-DAG: movapd %xmm2, %xmm1 -; CHECK-DAG: movapd %xmm2, %xmm0 -; CHECK-LABEL:.LBB3_2: -; CHECK: divsd %xmm1, %xmm0 -; CHECK: ret define double @foo4(i32 %p1, double %p2, double %p3, +; CHECK-LABEL: foo4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: js .LBB3_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movapd %xmm2, %xmm1 +; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: .LBB3_2: # %entry +; CHECK-NEXT: divsd %xmm1, %xmm0 +; CHECK-NEXT: retq double %p4, double %p5) nounwind { entry: %c1 = icmp slt i32 %p1, 0 @@ -103,10 +134,24 @@ entry: ; for lowering the CMOV pseudos that get created for this IR. The tricky part ; of this test is that it tests the special code in CodeGenPrepare. ; -; CHECK-LABEL: foo5: -; CHECK: jae -; CHECK-NOT: jae define double @foo5(float %p1, double %p2, double %p3) nounwind { +; CHECK-LABEL: foo5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorps %xmm3, %xmm3 +; CHECK-NEXT: ucomiss %xmm3, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: jae .LBB4_1 +; CHECK-NEXT: # %bb.2: # %select.false +; CHECK-NEXT: addsd %xmm2, %xmm0 +; CHECK-NEXT: .LBB4_3: # %select.end +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: addsd %xmm2, %xmm0 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB4_1: +; CHECK-NEXT: addsd %xmm0, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movapd %xmm1, %xmm2 +; CHECK-NEXT: jmp .LBB4_3 entry: %c1 = fcmp oge float %p1, 0.000000e+00 %d0 = fadd double %p2, 1.25e0 @@ -122,11 +167,35 @@ entry: ; We should expand select instructions into 3 conditional branches as their ; condtions are different. ; -; CHECK-LABEL: foo6: -; CHECK: jae -; CHECK: jae -; CHECK: jae define double @foo6(float %p1, double %p2, double %p3) nounwind { +; CHECK-LABEL: foo6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps %xmm0, %xmm3 +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: ucomiss %xmm0, %xmm3 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: jae .LBB5_1 +; CHECK-NEXT: # %bb.2: # %select.false +; CHECK-NEXT: addsd %xmm2, %xmm0 +; CHECK-NEXT: .LBB5_3: # %select.end +; CHECK-NEXT: ucomiss {{.*}}(%rip), %xmm3 +; CHECK-NEXT: movapd %xmm0, %xmm4 +; CHECK-NEXT: jae .LBB5_5 +; CHECK-NEXT: # %bb.4: # %select.false2 +; CHECK-NEXT: movapd %xmm1, %xmm4 +; CHECK-NEXT: .LBB5_5: # %select.end1 +; CHECK-NEXT: ucomiss {{.*}}(%rip), %xmm3 +; CHECK-NEXT: movapd %xmm4, %xmm1 +; CHECK-NEXT: jae .LBB5_7 +; CHECK-NEXT: # %bb.6: # %select.false4 +; CHECK-NEXT: movapd %xmm2, %xmm1 +; CHECK-NEXT: .LBB5_7: # %select.end3 +; CHECK-NEXT: subsd %xmm4, %xmm0 +; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB5_1: +; CHECK-NEXT: addsd %xmm1, %xmm0 +; CHECK-NEXT: jmp .LBB5_3 entry: %c1 = fcmp oge float %p1, 0.000000e+00 %c2 = fcmp oge float %p1, 1.000000e+00 |