summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-10-18 07:42:15 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-10-18 07:42:15 +0000
commit4ddc92b6cdcb1a15b8973fad366cd70a06446439 (patch)
tree99a511ec1c2c48daeb7c1d068c79f38ea52e9656 /llvm/test/CodeGen/X86
parent391fb8662a75b7cc45cad8926888ff6c5685b4ab (diff)
downloadbcm5719-llvm-4ddc92b6cdcb1a15b8973fad366cd70a06446439.tar.gz
bcm5719-llvm-4ddc92b6cdcb1a15b8973fad366cd70a06446439.zip
[X86][SSE] Add lowering to cvttpd2dq/cvttps2dq for sitofp v2f64/2f32 to 2i32
As discussed on PR28461 we currently miss the chance to lower "fptosi <2 x double> %arg to <2 x i32>" to cvttpd2dq due to its use of illegal types. This patch adds support for fptosi to 2i32 from both 2f64 and 2f32. It also recognises that cvttpd2dq zeroes the upper 64-bits of the xmm result (similar to D23797) - we still don't do this for the cvttpd2dq/cvttps2dq intrinsics - this can be done in a future patch. Differential Revision: https://reviews.llvm.org/D23808 llvm-svn: 284459
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/vec_fp_to_int.ll130
1 files changed, 25 insertions, 105 deletions
diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll
index 0cd7213c677..fcf32a8bd77 100644
--- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll
+++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll
@@ -56,46 +56,18 @@ define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) {
; SSE-LABEL: fptosi_2f64_to_4i32:
; SSE: # BB#0:
-; SSE-NEXT: cvttsd2si %xmm0, %rax
-; SSE-NEXT: movd %rax, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: cvttsd2si %xmm0, %rax
-; SSE-NEXT: movd %rax, %xmm0
-; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,2]
-; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fptosi_2f64_to_4i32:
; AVX: # BB#0:
-; AVX-NEXT: vcvttsd2si %xmm0, %rax
-; AVX-NEXT: vmovq %rax, %xmm1
-; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX-NEXT: vcvttsd2si %xmm0, %rax
-; AVX-NEXT: vmovq %rax, %xmm0
-; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512F-LABEL: fptosi_2f64_to_4i32:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
-; AVX512F-NEXT: vmovq %rax, %xmm1
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
-; AVX512F-NEXT: vmovq %rax, %xmm0
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
-; AVX512F-NEXT: retq
-;
-; AVX512DQ-LABEL: fptosi_2f64_to_4i32:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vcvttpd2qq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: fptosi_2f64_to_4i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512-NEXT: retq
%cvt = fptosi <2 x double> %a to <2 x i32>
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %ext
@@ -104,39 +76,21 @@ define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) {
define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
; SSE-LABEL: fptosi_2f64_to_2i32:
; SSE: # BB#0:
-; SSE-NEXT: cvttsd2si %xmm0, %rax
-; SSE-NEXT: movd %rax, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: cvttsd2si %xmm0, %rax
-; SSE-NEXT: movd %rax, %xmm0
-; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSE-NEXT: retq
;
; AVX-LABEL: fptosi_2f64_to_2i32:
; AVX: # BB#0:
-; AVX-NEXT: vcvttsd2si %xmm0, %rax
-; AVX-NEXT: vmovq %rax, %xmm1
-; AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX-NEXT: vcvttsd2si %xmm0, %rax
-; AVX-NEXT: vmovq %rax, %xmm0
-; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: retq
;
-; AVX512F-LABEL: fptosi_2f64_to_2i32:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
-; AVX512F-NEXT: vmovq %rax, %xmm1
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512F-NEXT: vcvttsd2si %xmm0, %rax
-; AVX512F-NEXT: vmovq %rax, %xmm0
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX512F-NEXT: retq
-;
-; AVX512DQ-LABEL: fptosi_2f64_to_2i32:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vcvttpd2qq %xmm0, %xmm0
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: fptosi_2f64_to_2i32:
+; AVX512: # BB#0:
+; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512-NEXT: retq
%cvt = fptosi <2 x double> %a to <2 x i32>
ret <2 x i32> %cvt
}
@@ -144,17 +98,8 @@ define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
; SSE-LABEL: fptosi_4f64_to_2i32:
; SSE: # BB#0:
-; SSE-NEXT: cvttsd2si %xmm0, %rax
-; SSE-NEXT: movd %rax, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: cvttsd2si %xmm0, %rax
-; SSE-NEXT: movd %rax, %xmm0
-; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
-; SSE-NEXT: cvttsd2si %xmm0, %rax
-; SSE-NEXT: movd %rax, %xmm1
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm1
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
@@ -259,20 +204,8 @@ define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
; SSE-LABEL: fptosi_4f64_to_4i32:
; SSE: # BB#0:
-; SSE-NEXT: cvttsd2si %xmm1, %rax
-; SSE-NEXT: movd %rax, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
-; SSE-NEXT: cvttsd2si %xmm1, %rax
-; SSE-NEXT: movd %rax, %xmm1
-; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; SSE-NEXT: cvttsd2si %xmm0, %rax
-; SSE-NEXT: movd %rax, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: cvttsd2si %xmm0, %rax
-; SSE-NEXT: movd %rax, %xmm0
-; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE-NEXT: cvttpd2dq %xmm1, %xmm1
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-NEXT: retq
;
@@ -713,33 +646,20 @@ define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) {
; SSE-LABEL: fptosi_2f32_to_2i32:
; SSE: # BB#0:
-; SSE-NEXT: cvttss2si %xmm0, %rax
-; SSE-NEXT: movd %rax, %xmm1
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-NEXT: cvttss2si %xmm0, %rax
-; SSE-NEXT: movd %rax, %xmm0
-; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSE-NEXT: retq
;
; AVX-LABEL: fptosi_2f32_to_2i32:
; AVX: # BB#0:
-; AVX-NEXT: vcvttss2si %xmm0, %rax
-; AVX-NEXT: vmovq %rax, %xmm1
-; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX-NEXT: vcvttss2si %xmm0, %rax
-; AVX-NEXT: vmovq %rax, %xmm0
-; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX-NEXT: retq
;
; AVX512-LABEL: fptosi_2f32_to_2i32:
; AVX512: # BB#0:
-; AVX512-NEXT: vcvttss2si %xmm0, %rax
-; AVX512-NEXT: vmovq %rax, %xmm1
-; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX512-NEXT: vcvttss2si %xmm0, %rax
-; AVX512-NEXT: vmovq %rax, %xmm0
-; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX512-NEXT: retq
%cvt = fptosi <2 x float> %a to <2 x i32>
ret <2 x i32> %cvt
OpenPOWER on IntegriCloud