diff options
| author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-05-06 17:09:03 +0000 |
|---|---|---|
| committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2014-05-06 17:09:03 +0000 |
| commit | c14ccc9184fc1bf02212fb85fdce36e9be8558d5 (patch) | |
| tree | 04df0d8eeefdac20a3303a8df8c3e4943b575f99 /llvm/test/CodeGen | |
| parent | 29020cc571d564976555ec0eef26b53fc0590a6b (diff) | |
| download | bcm5719-llvm-c14ccc9184fc1bf02212fb85fdce36e9be8558d5.tar.gz bcm5719-llvm-c14ccc9184fc1bf02212fb85fdce36e9be8558d5.zip | |
[X86] Improve the lowering of BITCAST dag nodes from type f64 to type v2i32 (and vice versa).
Before this patch, the backend always emitted a store+load sequence to
bitconvert from f64 to i64 the input operand of a ISD::BITCAST dag node that
performed a bitconvert from type MVT::f64 to type MVT::v2i32. The resulting
i64 node was then used to build a v2i32 vector.
With this patch, the backend now produces a cheaper SCALAR_TO_VECTOR from
MVT::f64 to MVT::v2f64. That SCALAR_TO_VECTOR is then followed by a "free"
bitcast to type MVT::v4i32. The elements of the resulting
v4i32 are then extracted to build a v2i32 vector (which is illegal and
therefore promoted to MVT::v2i64).
This is in general cheaper than emitting a stack store+load sequence
to bitconvert the operand from type f64 to type i64.
llvm-svn: 208107
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/lower-bitcast-v2i32.ll | 80 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/ret-mmx.ll | 5 |
2 files changed, 83 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/X86/lower-bitcast-v2i32.ll b/llvm/test/CodeGen/X86/lower-bitcast-v2i32.ll new file mode 100644 index 00000000000..1c0de630ef8 --- /dev/null +++ b/llvm/test/CodeGen/X86/lower-bitcast-v2i32.ll @@ -0,0 +1,80 @@ +; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s + + +define double @test1(double %A) { + %1 = bitcast double %A to <2 x i32> + %add = add <2 x i32> %1, <i32 3, i32 5> + %2 = bitcast <2 x i32> %add to double + ret double %2 +} +; FIXME: Ideally we should be able to fold the entire body of @test1 into a +; single paddd instruction. At the moment we produce the sequence +; pshufd+paddq+pshufd. + +; CHECK-LABEL: test1 +; CHECK-NOT: movsd +; CHECK: pshufd +; CHECK-NEXT: paddq +; CHECK-NEXT: pshufd +; CHECK-NEXT: ret + + +define double @test2(double %A, double %B) { + %1 = bitcast double %A to <2 x i32> + %2 = bitcast double %B to <2 x i32> + %add = add <2 x i32> %1, %2 + %3 = bitcast <2 x i32> %add to double + ret double %3 +} +; FIXME: Ideally we should be able to fold the entire body of @test2 into a +; single 'paddd %xmm1, %xmm0' instruction. At the moment we produce the +; sequence pshufd+pshufd+paddq+pshufd. + +; CHECK-LABEL: test2 +; CHECK-NOT: movsd +; CHECK: pshufd +; CHECK-NEXT: pshufd +; CHECK-NEXT: paddq +; CHECK-NEXT: pshufd +; CHECK-NEXT: ret + + +define i64 @test3(i64 %A) { + %1 = bitcast i64 %A to <2 x float> + %add = fadd <2 x float> %1, <float 3.0, float 5.0> + %2 = bitcast <2 x float> %add to i64 + ret i64 %2 +} +; CHECK-LABEL: test3 +; CHECK-NOT: pshufd +; CHECK: addps +; CHECK-NOT: pshufd +; CHECK: ret + + +define i64 @test4(i64 %A) { + %1 = bitcast i64 %A to <2 x i32> + %add = add <2 x i32> %1, <i32 3, i32 5> + %2 = bitcast <2 x i32> %add to i64 + ret i64 %2 +} +; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd. +; Ideally, we should fold that sequence into a single paddd. + +; CHECK-LABEL: test4 +; CHECK: pshufd +; CHECK-NEXT: paddq +; CHECK-NEXT: pshufd +; CHECK: ret + + +define double @test5(double %A) { + %1 = bitcast double %A to <2 x float> + %add = fadd <2 x float> %1, <float 3.0, float 5.0> + %2 = bitcast <2 x float> %add to double + ret double %2 +} +; CHECK-LABEL: test5 +; CHECK: addps +; CHECK-NEXT: ret + diff --git a/llvm/test/CodeGen/X86/ret-mmx.ll b/llvm/test/CodeGen/X86/ret-mmx.ll index 091fd539849..7adf307ce0f 100644 --- a/llvm/test/CodeGen/X86/ret-mmx.ll +++ b/llvm/test/CodeGen/X86/ret-mmx.ll @@ -33,7 +33,8 @@ define <2 x i32> @t3() nounwind { define double @t4() nounwind { ret double bitcast (<2 x i32> <i32 1, i32 0> to double) ; CHECK-LABEL: t4: -; CHECK: movl $1 -; CHECK: movd {{.*}}, %xmm0 +; CHECK-NOT: movl $1 +; CHECK-NOT: pshufd +; CHECK: movsd {{.*}}, %xmm0 } |

