summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-05-06 17:09:03 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-05-06 17:09:03 +0000
commitc14ccc9184fc1bf02212fb85fdce36e9be8558d5 (patch)
tree04df0d8eeefdac20a3303a8df8c3e4943b575f99 /llvm/test/CodeGen
parent29020cc571d564976555ec0eef26b53fc0590a6b (diff)
downloadbcm5719-llvm-c14ccc9184fc1bf02212fb85fdce36e9be8558d5.tar.gz
bcm5719-llvm-c14ccc9184fc1bf02212fb85fdce36e9be8558d5.zip
[X86] Improve the lowering of BITCAST dag nodes from type f64 to type v2i32 (and vice versa).
Before this patch, the backend always emitted a store+load sequence to bitconvert from f64 to i64 the input operand of a ISD::BITCAST dag node that performed a bitconvert from type MVT::f64 to type MVT::v2i32. The resulting i64 node was then used to build a v2i32 vector. With this patch, the backend now produces a cheaper SCALAR_TO_VECTOR from MVT::f64 to MVT::v2f64. That SCALAR_TO_VECTOR is then followed by a "free" bitcast to type MVT::v4i32. The elements of the resulting v4i32 are then extracted to build a v2i32 vector (which is illegal and therefore promoted to MVT::v2i64). This is in general cheaper than emitting a stack store+load sequence to bitconvert the operand from type f64 to type i64. llvm-svn: 208107
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/lower-bitcast-v2i32.ll80
-rw-r--r--llvm/test/CodeGen/X86/ret-mmx.ll5
2 files changed, 83 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/X86/lower-bitcast-v2i32.ll b/llvm/test/CodeGen/X86/lower-bitcast-v2i32.ll
new file mode 100644
index 00000000000..1c0de630ef8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/lower-bitcast-v2i32.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s
+
+
+define double @test1(double %A) {
+ %1 = bitcast double %A to <2 x i32>
+ %add = add <2 x i32> %1, <i32 3, i32 5>
+ %2 = bitcast <2 x i32> %add to double
+ ret double %2
+}
+; FIXME: Ideally we should be able to fold the entire body of @test1 into a
+; single paddd instruction. At the moment we produce the sequence
+; pshufd+paddq+pshufd.
+
+; CHECK-LABEL: test1
+; CHECK-NOT: movsd
+; CHECK: pshufd
+; CHECK-NEXT: paddq
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: ret
+
+
+define double @test2(double %A, double %B) {
+ %1 = bitcast double %A to <2 x i32>
+ %2 = bitcast double %B to <2 x i32>
+ %add = add <2 x i32> %1, %2
+ %3 = bitcast <2 x i32> %add to double
+ ret double %3
+}
+; FIXME: Ideally we should be able to fold the entire body of @test2 into a
+; single 'paddd %xmm1, %xmm0' instruction. At the moment we produce the
+; sequence pshufd+pshufd+paddq+pshufd.
+
+; CHECK-LABEL: test2
+; CHECK-NOT: movsd
+; CHECK: pshufd
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: paddq
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: ret
+
+
+define i64 @test3(i64 %A) {
+ %1 = bitcast i64 %A to <2 x float>
+ %add = fadd <2 x float> %1, <float 3.0, float 5.0>
+ %2 = bitcast <2 x float> %add to i64
+ ret i64 %2
+}
+; CHECK-LABEL: test3
+; CHECK-NOT: pshufd
+; CHECK: addps
+; CHECK-NOT: pshufd
+; CHECK: ret
+
+
+define i64 @test4(i64 %A) {
+ %1 = bitcast i64 %A to <2 x i32>
+ %add = add <2 x i32> %1, <i32 3, i32 5>
+ %2 = bitcast <2 x i32> %add to i64
+ ret i64 %2
+}
+; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd.
+; Ideally, we should fold that sequence into a single paddd.
+
+; CHECK-LABEL: test4
+; CHECK: pshufd
+; CHECK-NEXT: paddq
+; CHECK-NEXT: pshufd
+; CHECK: ret
+
+
+define double @test5(double %A) {
+ %1 = bitcast double %A to <2 x float>
+ %add = fadd <2 x float> %1, <float 3.0, float 5.0>
+ %2 = bitcast <2 x float> %add to double
+ ret double %2
+}
+; CHECK-LABEL: test5
+; CHECK: addps
+; CHECK-NEXT: ret
+
diff --git a/llvm/test/CodeGen/X86/ret-mmx.ll b/llvm/test/CodeGen/X86/ret-mmx.ll
index 091fd539849..7adf307ce0f 100644
--- a/llvm/test/CodeGen/X86/ret-mmx.ll
+++ b/llvm/test/CodeGen/X86/ret-mmx.ll
@@ -33,7 +33,8 @@ define <2 x i32> @t3() nounwind {
define double @t4() nounwind {
ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
; CHECK-LABEL: t4:
-; CHECK: movl $1
-; CHECK: movd {{.*}}, %xmm0
+; CHECK-NOT: movl $1
+; CHECK-NOT: pshufd
+; CHECK: movsd {{.*}}, %xmm0
}
OpenPOWER on IntegriCloud