summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
diff options
context:
space:
mode:
authorJim Grosbach <grosbach@apple.com>2014-07-18 00:40:56 +0000
committerJim Grosbach <grosbach@apple.com>2014-07-18 00:40:56 +0000
commitb6535c32f549a2a981578c29a93995213c798b1e (patch)
tree060e009e90f613cb6813112bb7a570dcda7bfa2f /llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
parentf7502c48840f9919018243dc0e48d2681f55d65e (diff)
downloadbcm5719-llvm-b6535c32f549a2a981578c29a93995213c798b1e.tar.gz
bcm5719-llvm-b6535c32f549a2a981578c29a93995213c798b1e.zip
X86: Constant fold converting vector setcc results to float.
Since the result of a SETCC for X86 is 0 or -1 in each lane, we can move unary operations, in this case [su]int_to_fp through the mask operation and constant fold the operation away. Generally speaking: UNARYOP(AND(VECTOR_CMP(x,y), constant)) --> AND(VECTOR_CMP(x,y), constant2) where constant2 is UNARYOP(constant). This implements the transform where UNARYOP is [su]int_to_fp. For example, consider the simple function: define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind { %cmp = fcmp oeq <4 x float> %val, %test %ext = zext <4 x i1> %cmp to <4 x i32> %result = sitofp <4 x i32> %ext to <4 x float> ret <4 x float> %result } Before this change, the SSE code is generated as: LCPI0_0: .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .long 1 ## 0x1 .section __TEXT,__text,regular,pure_instructions .globl _foo .align 4, 0x90 _foo: ## @foo cmpeqps %xmm1, %xmm0 andps LCPI0_0(%rip), %xmm0 cvtdq2ps %xmm0, %xmm0 retq After, the code is improved to: LCPI0_0: .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .long 1065353216 ## float 1.000000e+00 .section __TEXT,__text,regular,pure_instructions .globl _foo .align 4, 0x90 _foo: ## @foo cmpeqps %xmm1, %xmm0 andps LCPI0_0(%rip), %xmm0 retq The cvtdq2ps has been constant folded away and the floating point 1.0f vector lanes are materialized directly via the ModRM operand of andps. llvm-svn: 213342
Diffstat (limited to 'llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll')
-rw-r--r--llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll18
1 files changed, 18 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll b/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
new file mode 100644
index 00000000000..d691685f1dd
--- /dev/null
+++ b/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
+; CHECK-LABEL: LCPI0_0
+; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
+; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
+; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
+; CHECK-NEXT: .long 1065353216 ## float 1.000000e+00
+; CHECK-LABEL: foo:
+; CHECK: cmpeqps %xmm1, %xmm0
+; CHECK-NEXT: andps LCPI0_0(%rip), %xmm0
+; CHECK-NEXT: retq
+
+ %cmp = fcmp oeq <4 x float> %val, %test
+ %ext = zext <4 x i1> %cmp to <4 x i32>
+ %result = sitofp <4 x i32> %ext to <4 x float>
+ ret <4 x float> %result
+}
OpenPOWER on IntegriCloud