[X86] Improve the lowering of BITCAST dag nodes from type f64 to type v2i32 (and vice versa).

Before this patch, the backend always emitted a store+load sequence to bitconvert from f64 to i64 the input operand of a ISD::BITCAST dag node that performed a bitconvert from type MVT::f64 to type MVT::v2i32. The resulting i64 node was then used to build a v2i32 vector. With this patch, the backend now produces a cheaper SCALAR_TO_VECTOR from MVT::f64 to MVT::v2f64. That SCALAR_TO_VECTOR is then followed by a "free" bitcast to type MVT::v4i32. The elements of the resulting v4i32 are then extracted to build a v2i32 vector (which is illegal and therefore promoted to MVT::v2i64). This is in general cheaper than emitting a stack store+load sequence to bitconvert the operand from type f64 to type i64. llvm-svn: 208107
author: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2014-05-06 17:09:03 +0000
committer: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> 2014-05-06 17:09:03 +0000
commit: c14ccc9184fc1bf02212fb85fdce36e9be8558d5 (patch)
tree: 04df0d8eeefdac20a3303a8df8c3e4943b575f99 /llvm/test/CodeGen
parent: 29020cc571d564976555ec0eef26b53fc0590a6b (diff)
download: bcm5719-llvm-c14ccc9184fc1bf02212fb85fdce36e9be8558d5.tar.gz
bcm5719-llvm-c14ccc9184fc1bf02212fb85fdce36e9be8558d5.zip
2 files changed, 83 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/X86/lower-bitcast-v2i32.ll b/llvm/test/CodeGen/X86/lower-bitcast-v2i32.ll
new file mode 100644
index 00000000000..1c0de630ef8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/lower-bitcast-v2i32.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s
+
+
+define double @test1(double %A) {
+  %1 = bitcast double %A to <2 x i32>
+  %add = add <2 x i32> %1, <i32 3, i32 5>
+  %2 = bitcast <2 x i32> %add to double
+  ret double %2
+}
+; FIXME: Ideally we should be able to fold the entire body of @test1 into a
+; single paddd instruction. At the moment we produce the sequence 
+; pshufd+paddq+pshufd.
+
+; CHECK-LABEL: test1
+; CHECK-NOT: movsd
+; CHECK: pshufd
+; CHECK-NEXT: paddq
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: ret
+
+
+define double @test2(double %A, double %B) {
+  %1 = bitcast double %A to <2 x i32>
+  %2 = bitcast double %B to <2 x i32>
+  %add = add <2 x i32> %1, %2
+  %3 = bitcast <2 x i32> %add to double
+  ret double %3
+}
+; FIXME: Ideally we should be able to fold the entire body of @test2 into a
+; single 'paddd %xmm1, %xmm0' instruction. At the moment we produce the
+; sequence pshufd+pshufd+paddq+pshufd.
+
+; CHECK-LABEL: test2
+; CHECK-NOT: movsd
+; CHECK: pshufd
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: paddq
+; CHECK-NEXT: pshufd
+; CHECK-NEXT: ret
+
+
+define i64 @test3(i64 %A) {
+  %1 = bitcast i64 %A to <2 x float>
+  %add = fadd <2 x float> %1, <float 3.0, float 5.0>
+  %2 = bitcast <2 x float> %add to i64
+  ret i64 %2
+}
+; CHECK-LABEL: test3
+; CHECK-NOT: pshufd
+; CHECK: addps
+; CHECK-NOT: pshufd
+; CHECK: ret
+
+
+define i64 @test4(i64 %A) {
+  %1 = bitcast i64 %A to <2 x i32>
+  %add = add <2 x i32> %1, <i32 3, i32 5>
+  %2 = bitcast <2 x i32> %add to i64
+  ret i64 %2
+}
+; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd.
+; Ideally, we should fold that sequence into a single paddd.
+
+; CHECK-LABEL: test4
+; CHECK: pshufd
+; CHECK-NEXT: paddq
+; CHECK-NEXT: pshufd
+; CHECK: ret
+
+
+define double @test5(double %A) {
+  %1 = bitcast double %A to <2 x float>
+  %add = fadd <2 x float> %1, <float 3.0, float 5.0>
+  %2 = bitcast <2 x float> %add to double
+  ret double %2
+}
+; CHECK-LABEL: test5
+; CHECK: addps
+; CHECK-NEXT: ret
+
diff --git a/llvm/test/CodeGen/X86/ret-mmx.ll b/llvm/test/CodeGen/X86/ret-mmx.ll
index 091fd539849..7adf307ce0f 100644
--- a/llvm/test/CodeGen/X86/ret-mmx.ll
+++ b/llvm/test/CodeGen/X86/ret-mmx.ll
@@ -33,7 +33,8 @@ define <2 x i32> @t3() nounwind {
 define double @t4() nounwind {
 	ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
 ; CHECK-LABEL: t4:
-; CHECK: movl $1
-; CHECK: movd {{.*}}, %xmm0
+; CHECK-NOT: movl $1
+; CHECK-NOT: pshufd
+; CHECK: movsd {{.*}}, %xmm0
 }
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2014-05-06 17:09:03 +0000
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	2014-05-06 17:09:03 +0000
commit	c14ccc9184fc1bf02212fb85fdce36e9be8558d5 (patch)
tree	04df0d8eeefdac20a3303a8df8c3e4943b575f99 /llvm/test/CodeGen
parent	29020cc571d564976555ec0eef26b53fc0590a6b (diff)
download	bcm5719-llvm-c14ccc9184fc1bf02212fb85fdce36e9be8558d5.tar.gz bcm5719-llvm-c14ccc9184fc1bf02212fb85fdce36e9be8558d5.zip