diff options
author | Hans Wennborg <hans@hanshq.net> | 2015-11-19 16:35:08 +0000 |
---|---|---|
committer | Hans Wennborg <hans@hanshq.net> | 2015-11-19 16:35:08 +0000 |
commit | dcc2500452746939988c613e2b6d00513dc2ab3e (patch) | |
tree | e481c9d219e1b7274e06ecf50e083fb93a287673 /llvm/test | |
parent | 768579c409cde8517e70867df5783402e8da8d08 (diff) | |
download | bcm5719-llvm-dcc2500452746939988c613e2b6d00513dc2ab3e.tar.gz bcm5719-llvm-dcc2500452746939988c613e2b6d00513dc2ab3e.zip |
X86: More efficient legalization of wide integer compares
In particular, this makes the code for 64-bit compares on 32-bit targets
much more efficient.
Example:
define i32 @test_slt(i64 %a, i64 %b) {
entry:
%cmp = icmp slt i64 %a, %b
br i1 %cmp, label %bb1, label %bb2
bb1:
ret i32 1
bb2:
ret i32 2
}
Before this patch:
test_slt:
movl 4(%esp), %eax
movl 8(%esp), %ecx
cmpl 12(%esp), %eax
setae %al
cmpl 16(%esp), %ecx
setge %cl
je .LBB2_2
movb %cl, %al
.LBB2_2:
testb %al, %al
jne .LBB2_4
movl $1, %eax
retl
.LBB2_4:
movl $2, %eax
retl
After this patch:
test_slt:
movl 4(%esp), %eax
movl 8(%esp), %ecx
cmpl 12(%esp), %eax
sbbl 16(%esp), %ecx
jge .LBB1_2
movl $1, %eax
retl
.LBB1_2:
movl $2, %eax
retl
Differential Revision: http://reviews.llvm.org/D14496
llvm-svn: 253572
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/atomic-minmax-i6432.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/atomic128.ll | 52 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-cmp.ll | 25 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/wide-integer-cmp.ll | 130 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/win32-pic-jumptable.ll | 4 |
6 files changed, 149 insertions, 73 deletions
diff --git a/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll index a19aa52f302..816577be15e 100644 --- a/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll +++ b/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll @@ -26,6 +26,5 @@ if.end: ; preds = %if.then, %entry ret void ; CHECK-LABEL: fn1: -; CHECK: shrq $32, [[REG:%.*]] -; CHECK: sete +; CHECK: jb } diff --git a/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll b/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll index 4989bc14ef8..d5d3fa6db5e 100644 --- a/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll +++ b/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll @@ -8,7 +8,7 @@ define void @atomic_maxmin_i6432() { %1 = atomicrmw max i64* @sc64, i64 5 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: seta +; LINUX: sbbl ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock cmpxchg8b @@ -16,7 +16,7 @@ define void @atomic_maxmin_i6432() { %2 = atomicrmw min i64* @sc64, i64 6 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: setb +; LINUX: sbbl ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock cmpxchg8b @@ -24,7 +24,7 @@ define void @atomic_maxmin_i6432() { %3 = atomicrmw umax i64* @sc64, i64 7 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: seta +; LINUX: sbbl ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock cmpxchg8b @@ -32,7 +32,7 @@ define void @atomic_maxmin_i6432() { %4 = atomicrmw umin i64* @sc64, i64 8 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: setb +; LINUX: sbbl ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock cmpxchg8b diff --git a/llvm/test/CodeGen/X86/atomic128.ll b/llvm/test/CodeGen/X86/atomic128.ll index dea7d482f98..c41269b0b60 100644 --- a/llvm/test/CodeGen/X86/atomic128.ll +++ b/llvm/test/CodeGen/X86/atomic128.ll @@ -119,16 +119,9 @@ define void @fetch_and_min(i128* %p, i128 %bits) { ; CHECK-DAG: movq 8(%rdi), %rdx ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq %rsi, %rax -; CHECK: setbe [[CMP:%[a-z0-9]+]] -; CHECK: cmpq [[INCHI]], %rdx -; CHECK: setle [[HICMP:%[a-z0-9]+]] -; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] - -; CHECK: movb [[HICMP]], [[CMP]] -; CHECK: [[USE_LO]]: -; CHECK: testb [[CMP]], [[CMP]] -; CHECK: movq %rsi, %rbx +; CHECK: cmpq +; CHECK: sbbq +; CHECK: setg ; CHECK: cmovneq %rax, %rbx ; CHECK: movq [[INCHI]], %rcx ; CHECK: cmovneq %rdx, %rcx @@ -151,16 +144,9 @@ define void @fetch_and_max(i128* %p, i128 %bits) { ; CHECK-DAG: movq 8(%rdi), %rdx ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq %rsi, %rax -; CHECK: setae [[CMP:%[a-z0-9]+]] -; CHECK: cmpq [[INCHI]], %rdx -; CHECK: setge [[HICMP:%[a-z0-9]+]] -; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] - -; CHECK: movb [[HICMP]], [[CMP]] -; CHECK: [[USE_LO]]: -; CHECK: testb [[CMP]], [[CMP]] -; CHECK: movq %rsi, %rbx +; CHECK: cmpq +; CHECK: sbbq +; CHECK: setge ; CHECK: cmovneq %rax, %rbx ; CHECK: movq [[INCHI]], %rcx ; CHECK: cmovneq %rdx, %rcx @@ -183,16 +169,9 @@ define void @fetch_and_umin(i128* %p, i128 %bits) { ; CHECK-DAG: movq 8(%rdi), %rdx ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq %rsi, %rax -; CHECK: setbe [[CMP:%[a-z0-9]+]] -; CHECK: cmpq [[INCHI]], %rdx -; CHECK: setbe [[HICMP:%[a-z0-9]+]] -; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] - -; CHECK: movb [[HICMP]], [[CMP]] -; CHECK: [[USE_LO]]: -; CHECK: testb [[CMP]], [[CMP]] -; CHECK: movq %rsi, %rbx +; CHECK: cmpq +; CHECK: sbbq +; CHECK: seta ; CHECK: cmovneq %rax, %rbx ; CHECK: movq [[INCHI]], %rcx ; CHECK: cmovneq %rdx, %rcx @@ -215,16 +194,9 @@ define void @fetch_and_umax(i128* %p, i128 %bits) { ; CHECK-DAG: movq 8(%rdi), %rdx ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq %rax, %rsi -; CHECK: setb [[CMP:%[a-z0-9]+]] -; CHECK: cmpq [[INCHI]], %rdx -; CHECK: seta [[HICMP:%[a-z0-9]+]] -; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] - -; CHECK: movb [[HICMP]], [[CMP]] -; CHECK: [[USE_LO]]: -; CHECK: testb [[CMP]], [[CMP]] -; CHECK: movq %rsi, %rbx +; CHECK: cmpq +; CHECK: sbbq +; CHECK: setb ; CHECK: cmovneq %rax, %rbx ; CHECK: movq [[INCHI]], %rcx ; CHECK: cmovneq %rdx, %rcx diff --git a/llvm/test/CodeGen/X86/avx512-cmp.ll b/llvm/test/CodeGen/X86/avx512-cmp.ll index f6ea29123f1..6e0d18558c5 100644 --- a/llvm/test/CodeGen/X86/avx512-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-cmp.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix AVX512-32 ; CHECK-LABEL: test1 ; CHECK: vucomisd {{.*}}encoding: [0x62 @@ -100,27 +99,3 @@ A: B: ret i32 7 } - -; AVX512-32-LABEL: test10 -; AVX512-32: movl 4(%esp), %ecx -; AVX512-32: cmpl $9, (%ecx) -; AVX512-32: seta %al -; AVX512-32: cmpl $0, 4(%ecx) -; AVX512-32: setg %cl -; AVX512-32: je -; AVX512-32: movb %cl, %al -; AVX512-32: testb $1, %al - -define void @test10(i64* %i.addr) { - - %x = load i64, i64* %i.addr, align 8 - %cmp = icmp slt i64 %x, 10 - br i1 %cmp, label %true, label %false - -true: - ret void - -false: - ret void -} - diff --git a/llvm/test/CodeGen/X86/wide-integer-cmp.ll b/llvm/test/CodeGen/X86/wide-integer-cmp.ll new file mode 100644 index 00000000000..c45a0541e6a --- /dev/null +++ b/llvm/test/CodeGen/X86/wide-integer-cmp.ll @@ -0,0 +1,130 @@ +; RUN: llc -mtriple=i686-linux-gnu %s -o - | FileCheck %s + + +define i32 @branch_eq(i64 %a, i64 %b) { +entry: + %cmp = icmp eq i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: branch_eq: +; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]] +; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]] +; CHECK: xorl 16(%esp), [[LHSHi]] +; CHECK: xorl 12(%esp), [[LHSLo]] +; CHECK: orl [[LHSHi]], [[LHSLo]] +; CHECK: jne [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @branch_slt(i64 %a, i64 %b) { +entry: + %cmp = icmp slt i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: branch_slt: +; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]] +; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]] +; CHECK: cmpl 12(%esp), [[LHSLo]] +; CHECK: sbbl 16(%esp), [[LHSHi]] +; CHECK: jge [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @branch_ule(i64 %a, i64 %b) { +entry: + %cmp = icmp ule i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: branch_ule: +; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]] +; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]] +; CHECK: cmpl 4(%esp), [[RHSLo]] +; CHECK: sbbl 8(%esp), [[RHSHi]] +; CHECK: jb [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @set_gt(i64 %a, i64 %b) { +entry: + %cmp = icmp sgt i64 %a, %b + %res = select i1 %cmp, i32 1, i32 0 + ret i32 %res + +; CHECK-LABEL: set_gt: +; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]] +; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]] +; CHECK: cmpl 4(%esp), [[RHSLo]] +; CHECK: sbbl 8(%esp), [[RHSHi]] +; CHECK: setl %al +; CHECK: retl +} + +define i32 @test_wide(i128 %a, i128 %b) { +entry: + %cmp = icmp slt i128 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: test_wide: +; CHECK: cmpl 24(%esp) +; CHECK: sbbl 28(%esp) +; CHECK: sbbl 32(%esp) +; CHECK: sbbl 36(%esp) +; CHECK: jge [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @test_carry_false(i64 %a, i64 %b) { +entry: + %x = and i64 %a, -4294967296 ;0xffffffff00000000 + %y = and i64 %b, -4294967296 + %cmp = icmp slt i64 %x, %y + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; The comparison of the low bits will be folded to a CARRY_FALSE node. Make +; sure the code can handle that. +; CHECK-LABEL: carry_false: +; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]] +; CHECK: cmpl 16(%esp), [[LHSHi]] +; CHECK: jge [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} diff --git a/llvm/test/CodeGen/X86/win32-pic-jumptable.ll b/llvm/test/CodeGen/X86/win32-pic-jumptable.ll index cabd36ae395..1a90b6238f2 100644 --- a/llvm/test/CodeGen/X86/win32-pic-jumptable.ll +++ b/llvm/test/CodeGen/X86/win32-pic-jumptable.ll @@ -7,10 +7,10 @@ ; CHECK-NEXT: jmpl *%eax ; CHECK: LJTI0_0: +; CHECK-NEXT: .long LBB0_2-L0$pb +; CHECK-NEXT: .long LBB0_3-L0$pb ; CHECK-NEXT: .long LBB0_4-L0$pb ; CHECK-NEXT: .long LBB0_5-L0$pb -; CHECK-NEXT: .long LBB0_6-L0$pb -; CHECK-NEXT: .long LBB0_7-L0$pb target triple = "i686--windows-itanium" |