summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorHans Wennborg <hans@hanshq.net>2015-11-19 16:35:08 +0000
committerHans Wennborg <hans@hanshq.net>2015-11-19 16:35:08 +0000
commitdcc2500452746939988c613e2b6d00513dc2ab3e (patch)
treee481c9d219e1b7274e06ecf50e083fb93a287673 /llvm/test
parent768579c409cde8517e70867df5783402e8da8d08 (diff)
downloadbcm5719-llvm-dcc2500452746939988c613e2b6d00513dc2ab3e.tar.gz
bcm5719-llvm-dcc2500452746939988c613e2b6d00513dc2ab3e.zip
X86: More efficient legalization of wide integer compares
In particular, this makes the code for 64-bit compares on 32-bit targets much more efficient. Example: define i32 @test_slt(i64 %a, i64 %b) { entry: %cmp = icmp slt i64 %a, %b br i1 %cmp, label %bb1, label %bb2 bb1: ret i32 1 bb2: ret i32 2 } Before this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax setae %al cmpl 16(%esp), %ecx setge %cl je .LBB2_2 movb %cl, %al .LBB2_2: testb %al, %al jne .LBB2_4 movl $1, %eax retl .LBB2_4: movl $2, %eax retl After this patch: test_slt: movl 4(%esp), %eax movl 8(%esp), %ecx cmpl 12(%esp), %eax sbbl 16(%esp), %ecx jge .LBB1_2 movl $1, %eax retl .LBB1_2: movl $2, %eax retl Differential Revision: http://reviews.llvm.org/D14496 llvm-svn: 253572
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll3
-rw-r--r--llvm/test/CodeGen/X86/atomic-minmax-i6432.ll8
-rw-r--r--llvm/test/CodeGen/X86/atomic128.ll52
-rw-r--r--llvm/test/CodeGen/X86/avx512-cmp.ll25
-rw-r--r--llvm/test/CodeGen/X86/wide-integer-cmp.ll130
-rw-r--r--llvm/test/CodeGen/X86/win32-pic-jumptable.ll4
6 files changed, 149 insertions, 73 deletions
diff --git a/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
index a19aa52f302..816577be15e 100644
--- a/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
+++ b/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
@@ -26,6 +26,5 @@ if.end: ; preds = %if.then, %entry
ret void
; CHECK-LABEL: fn1:
-; CHECK: shrq $32, [[REG:%.*]]
-; CHECK: sete
+; CHECK: jb
}
diff --git a/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll b/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll
index 4989bc14ef8..d5d3fa6db5e 100644
--- a/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll
+++ b/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll
@@ -8,7 +8,7 @@ define void @atomic_maxmin_i6432() {
%1 = atomicrmw max i64* @sc64, i64 5 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
-; LINUX: seta
+; LINUX: sbbl
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock cmpxchg8b
@@ -16,7 +16,7 @@ define void @atomic_maxmin_i6432() {
%2 = atomicrmw min i64* @sc64, i64 6 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
-; LINUX: setb
+; LINUX: sbbl
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock cmpxchg8b
@@ -24,7 +24,7 @@ define void @atomic_maxmin_i6432() {
%3 = atomicrmw umax i64* @sc64, i64 7 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
-; LINUX: seta
+; LINUX: sbbl
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock cmpxchg8b
@@ -32,7 +32,7 @@ define void @atomic_maxmin_i6432() {
%4 = atomicrmw umin i64* @sc64, i64 8 acquire
; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
; LINUX: cmpl
-; LINUX: setb
+; LINUX: sbbl
; LINUX: cmovne
; LINUX: cmovne
; LINUX: lock cmpxchg8b
diff --git a/llvm/test/CodeGen/X86/atomic128.ll b/llvm/test/CodeGen/X86/atomic128.ll
index dea7d482f98..c41269b0b60 100644
--- a/llvm/test/CodeGen/X86/atomic128.ll
+++ b/llvm/test/CodeGen/X86/atomic128.ll
@@ -119,16 +119,9 @@ define void @fetch_and_min(i128* %p, i128 %bits) {
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpq %rsi, %rax
-; CHECK: setbe [[CMP:%[a-z0-9]+]]
-; CHECK: cmpq [[INCHI]], %rdx
-; CHECK: setle [[HICMP:%[a-z0-9]+]]
-; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
-
-; CHECK: movb [[HICMP]], [[CMP]]
-; CHECK: [[USE_LO]]:
-; CHECK: testb [[CMP]], [[CMP]]
-; CHECK: movq %rsi, %rbx
+; CHECK: cmpq
+; CHECK: sbbq
+; CHECK: setg
; CHECK: cmovneq %rax, %rbx
; CHECK: movq [[INCHI]], %rcx
; CHECK: cmovneq %rdx, %rcx
@@ -151,16 +144,9 @@ define void @fetch_and_max(i128* %p, i128 %bits) {
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpq %rsi, %rax
-; CHECK: setae [[CMP:%[a-z0-9]+]]
-; CHECK: cmpq [[INCHI]], %rdx
-; CHECK: setge [[HICMP:%[a-z0-9]+]]
-; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
-
-; CHECK: movb [[HICMP]], [[CMP]]
-; CHECK: [[USE_LO]]:
-; CHECK: testb [[CMP]], [[CMP]]
-; CHECK: movq %rsi, %rbx
+; CHECK: cmpq
+; CHECK: sbbq
+; CHECK: setge
; CHECK: cmovneq %rax, %rbx
; CHECK: movq [[INCHI]], %rcx
; CHECK: cmovneq %rdx, %rcx
@@ -183,16 +169,9 @@ define void @fetch_and_umin(i128* %p, i128 %bits) {
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpq %rsi, %rax
-; CHECK: setbe [[CMP:%[a-z0-9]+]]
-; CHECK: cmpq [[INCHI]], %rdx
-; CHECK: setbe [[HICMP:%[a-z0-9]+]]
-; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
-
-; CHECK: movb [[HICMP]], [[CMP]]
-; CHECK: [[USE_LO]]:
-; CHECK: testb [[CMP]], [[CMP]]
-; CHECK: movq %rsi, %rbx
+; CHECK: cmpq
+; CHECK: sbbq
+; CHECK: seta
; CHECK: cmovneq %rax, %rbx
; CHECK: movq [[INCHI]], %rcx
; CHECK: cmovneq %rdx, %rcx
@@ -215,16 +194,9 @@ define void @fetch_and_umax(i128* %p, i128 %bits) {
; CHECK-DAG: movq 8(%rdi), %rdx
; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
-; CHECK: cmpq %rax, %rsi
-; CHECK: setb [[CMP:%[a-z0-9]+]]
-; CHECK: cmpq [[INCHI]], %rdx
-; CHECK: seta [[HICMP:%[a-z0-9]+]]
-; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
-
-; CHECK: movb [[HICMP]], [[CMP]]
-; CHECK: [[USE_LO]]:
-; CHECK: testb [[CMP]], [[CMP]]
-; CHECK: movq %rsi, %rbx
+; CHECK: cmpq
+; CHECK: sbbq
+; CHECK: setb
; CHECK: cmovneq %rax, %rbx
; CHECK: movq [[INCHI]], %rcx
; CHECK: cmovneq %rdx, %rcx
diff --git a/llvm/test/CodeGen/X86/avx512-cmp.ll b/llvm/test/CodeGen/X86/avx512-cmp.ll
index f6ea29123f1..6e0d18558c5 100644
--- a/llvm/test/CodeGen/X86/avx512-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-cmp.ll
@@ -1,5 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix AVX512-32
; CHECK-LABEL: test1
; CHECK: vucomisd {{.*}}encoding: [0x62
@@ -100,27 +99,3 @@ A:
B:
ret i32 7
}
-
-; AVX512-32-LABEL: test10
-; AVX512-32: movl 4(%esp), %ecx
-; AVX512-32: cmpl $9, (%ecx)
-; AVX512-32: seta %al
-; AVX512-32: cmpl $0, 4(%ecx)
-; AVX512-32: setg %cl
-; AVX512-32: je
-; AVX512-32: movb %cl, %al
-; AVX512-32: testb $1, %al
-
-define void @test10(i64* %i.addr) {
-
- %x = load i64, i64* %i.addr, align 8
- %cmp = icmp slt i64 %x, 10
- br i1 %cmp, label %true, label %false
-
-true:
- ret void
-
-false:
- ret void
-}
-
diff --git a/llvm/test/CodeGen/X86/wide-integer-cmp.ll b/llvm/test/CodeGen/X86/wide-integer-cmp.ll
new file mode 100644
index 00000000000..c45a0541e6a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/wide-integer-cmp.ll
@@ -0,0 +1,130 @@
+; RUN: llc -mtriple=i686-linux-gnu %s -o - | FileCheck %s
+
+
+define i32 @branch_eq(i64 %a, i64 %b) {
+entry:
+ %cmp = icmp eq i64 %a, %b
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 2
+
+; CHECK-LABEL: branch_eq:
+; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]]
+; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]]
+; CHECK: xorl 16(%esp), [[LHSHi]]
+; CHECK: xorl 12(%esp), [[LHSLo]]
+; CHECK: orl [[LHSHi]], [[LHSLo]]
+; CHECK: jne [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
+
+define i32 @branch_slt(i64 %a, i64 %b) {
+entry:
+ %cmp = icmp slt i64 %a, %b
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 2
+
+; CHECK-LABEL: branch_slt:
+; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]]
+; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]]
+; CHECK: cmpl 12(%esp), [[LHSLo]]
+; CHECK: sbbl 16(%esp), [[LHSHi]]
+; CHECK: jge [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
+
+define i32 @branch_ule(i64 %a, i64 %b) {
+entry:
+ %cmp = icmp ule i64 %a, %b
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 2
+
+; CHECK-LABEL: branch_ule:
+; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]]
+; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]]
+; CHECK: cmpl 4(%esp), [[RHSLo]]
+; CHECK: sbbl 8(%esp), [[RHSHi]]
+; CHECK: jb [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
+
+define i32 @set_gt(i64 %a, i64 %b) {
+entry:
+ %cmp = icmp sgt i64 %a, %b
+ %res = select i1 %cmp, i32 1, i32 0
+ ret i32 %res
+
+; CHECK-LABEL: set_gt:
+; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]]
+; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]]
+; CHECK: cmpl 4(%esp), [[RHSLo]]
+; CHECK: sbbl 8(%esp), [[RHSHi]]
+; CHECK: setl %al
+; CHECK: retl
+}
+
+define i32 @test_wide(i128 %a, i128 %b) {
+entry:
+ %cmp = icmp slt i128 %a, %b
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 2
+
+; CHECK-LABEL: test_wide:
+; CHECK: cmpl 24(%esp)
+; CHECK: sbbl 28(%esp)
+; CHECK: sbbl 32(%esp)
+; CHECK: sbbl 36(%esp)
+; CHECK: jge [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
+
+define i32 @test_carry_false(i64 %a, i64 %b) {
+entry:
+ %x = and i64 %a, -4294967296 ;0xffffffff00000000
+ %y = and i64 %b, -4294967296
+ %cmp = icmp slt i64 %x, %y
+ br i1 %cmp, label %bb1, label %bb2
+bb1:
+ ret i32 1
+bb2:
+ ret i32 2
+
+; The comparison of the low bits will be folded to a CARRY_FALSE node. Make
+; sure the code can handle that.
+; CHECK-LABEL: carry_false:
+; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]]
+; CHECK: cmpl 16(%esp), [[LHSHi]]
+; CHECK: jge [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
diff --git a/llvm/test/CodeGen/X86/win32-pic-jumptable.ll b/llvm/test/CodeGen/X86/win32-pic-jumptable.ll
index cabd36ae395..1a90b6238f2 100644
--- a/llvm/test/CodeGen/X86/win32-pic-jumptable.ll
+++ b/llvm/test/CodeGen/X86/win32-pic-jumptable.ll
@@ -7,10 +7,10 @@
; CHECK-NEXT: jmpl *%eax
; CHECK: LJTI0_0:
+; CHECK-NEXT: .long LBB0_2-L0$pb
+; CHECK-NEXT: .long LBB0_3-L0$pb
; CHECK-NEXT: .long LBB0_4-L0$pb
; CHECK-NEXT: .long LBB0_5-L0$pb
-; CHECK-NEXT: .long LBB0_6-L0$pb
-; CHECK-NEXT: .long LBB0_7-L0$pb
target triple = "i686--windows-itanium"
OpenPOWER on IntegriCloud