summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-11-16 22:53:00 +0000
committerCraig Topper <craig.topper@intel.com>2018-11-16 22:53:00 +0000
commitee0333b4a9d5baf3165f09cd6fb4b520fae620e6 (patch)
tree7aec9c72e73a4c0b98a5628bf0e66b17519e281f /llvm/test/CodeGen
parentac35cd330ac06cce6a158e102fc9265f97476a7b (diff)
downloadbcm5719-llvm-ee0333b4a9d5baf3165f09cd6fb4b520fae620e6.tar.gz
bcm5719-llvm-ee0333b4a9d5baf3165f09cd6fb4b520fae620e6.zip
[X86] Add custom promotion of narrow fp_to_uint/fp_to_sint operations under -x86-experimental-vector-widening-legalization.
This tries to force the result type to vXi32 followed by a truncate. This can help avoid scalarization that would otherwise occur. There's some annoying examples of an avx512 truncate instruction followed by a packus where we should really be able to just use one truncate. But overall this is still a net improvement. llvm-svn: 347105
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/avx512-cvt-widen.ll42
-rw-r--r--llvm/test/CodeGen/X86/vec_cast2.ll94
-rw-r--r--llvm/test/CodeGen/X86/vec_cast3.ll28
-rw-r--r--llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll334
4 files changed, 97 insertions, 401 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
index a4d9a8272a3..82681ec7d41 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll
@@ -502,33 +502,21 @@ define <8 x i16> @f64to8us(<8 x double> %f) {
}
define <8 x i8> @f64to8uc(<8 x double> %f) {
-; ALL-LABEL: f64to8uc:
-; ALL: # %bb.0:
-; ALL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; ALL-NEXT: vcvttsd2si %xmm1, %eax
-; ALL-NEXT: vcvttsd2si %xmm0, %ecx
-; ALL-NEXT: vmovd %ecx, %xmm1
-; ALL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; ALL-NEXT: vextractf128 $1, %ymm0, %xmm2
-; ALL-NEXT: vcvttsd2si %xmm2, %eax
-; ALL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; ALL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; ALL-NEXT: vcvttsd2si %xmm2, %eax
-; ALL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm2
-; ALL-NEXT: vcvttsd2si %xmm2, %eax
-; ALL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; ALL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; ALL-NEXT: vcvttsd2si %xmm2, %eax
-; ALL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; ALL-NEXT: vextractf32x4 $3, %zmm0, %xmm0
-; ALL-NEXT: vcvttsd2si %xmm0, %eax
-; ALL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; ALL-NEXT: vcvttsd2si %xmm0, %eax
-; ALL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
-; ALL-NEXT: vzeroupper
-; ALL-NEXT: retq
+; NOVL-LABEL: f64to8uc:
+; NOVL: # %bb.0:
+; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0
+; NOVL-NEXT: vpmovdw %zmm0, %ymm0
+; NOVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; NOVL-NEXT: vzeroupper
+; NOVL-NEXT: retq
+;
+; VL-LABEL: f64to8uc:
+; VL: # %bb.0:
+; VL-NEXT: vcvttpd2dq %zmm0, %ymm0
+; VL-NEXT: vpmovdw %ymm0, %xmm0
+; VL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; VL-NEXT: vzeroupper
+; VL-NEXT: retq
%res = fptoui <8 x double> %f to <8 x i8>
ret <8 x i8> %res
}
diff --git a/llvm/test/CodeGen/X86/vec_cast2.ll b/llvm/test/CodeGen/X86/vec_cast2.ll
index 1bc4b690487..d746bf59121 100644
--- a/llvm/test/CodeGen/X86/vec_cast2.ll
+++ b/llvm/test/CodeGen/X86/vec_cast2.ll
@@ -172,29 +172,10 @@ define <8 x i8> @cvt_v8f32_v8i8(<8 x float> %src) {
;
; CHECK-WIDE-LABEL: cvt_v8f32_v8i8:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
-; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
+; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; CHECK-WIDE-NEXT: vzeroupper
; CHECK-WIDE-NEXT: retl
%res = fptosi <8 x float> %src to <8 x i8>
@@ -229,17 +210,8 @@ define <4 x i8> @cvt_v4f32_v4i8(<4 x float> %src) {
;
; CHECK-WIDE-LABEL: cvt_v4f32_v4i8:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
-; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm0
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; CHECK-WIDE-NEXT: retl
%res = fptosi <4 x float> %src to <4 x i8>
ret <4 x i8> %res
@@ -253,11 +225,8 @@ define <4 x i16> @cvt_v4f32_v4i16(<4 x float> %src) {
;
; CHECK-WIDE-LABEL: cvt_v4f32_v4i16:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; CHECK-WIDE-NEXT: vzeroupper
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; CHECK-WIDE-NEXT: retl
%res = fptosi <4 x float> %src to <4 x i16>
ret <4 x i16> %res
@@ -274,29 +243,10 @@ define <8 x i8> @cvt_v8f32_v8u8(<8 x float> %src) {
;
; CHECK-WIDE-LABEL: cvt_v8f32_v8u8:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
-; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0
+; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
+; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; CHECK-WIDE-NEXT: vzeroupper
; CHECK-WIDE-NEXT: retl
%res = fptoui <8 x float> %src to <8 x i8>
@@ -331,17 +281,8 @@ define <4 x i8> @cvt_v4f32_v4u8(<4 x float> %src) {
;
; CHECK-WIDE-LABEL: cvt_v4f32_v4u8:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: vmovd %ecx, %xmm1
-; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm2, %eax
-; CHECK-WIDE-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %eax
-; CHECK-WIDE-NEXT: vpinsrb $3, %eax, %xmm1, %xmm0
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
; CHECK-WIDE-NEXT: retl
%res = fptoui <4 x float> %src to <4 x i8>
ret <4 x i8> %res
@@ -355,11 +296,8 @@ define <4 x i16> @cvt_v4f32_v4u16(<4 x float> %src) {
;
; CHECK-WIDE-LABEL: cvt_v4f32_v4u16:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; CHECK-WIDE-NEXT: vzeroupper
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; CHECK-WIDE-NEXT: retl
%res = fptoui <4 x float> %src to <4 x i16>
ret <4 x i16> %res
diff --git a/llvm/test/CodeGen/X86/vec_cast3.ll b/llvm/test/CodeGen/X86/vec_cast3.ll
index e8662b8cc34..e4e6aa52ff5 100644
--- a/llvm/test/CodeGen/X86/vec_cast3.ll
+++ b/llvm/test/CodeGen/X86/vec_cast3.ll
@@ -117,11 +117,8 @@ define <2 x i8> @cvt_v2f32_v2i8(<2 x float> %src) {
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2i8:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: vmovd %ecx, %xmm0
-; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; CHECK-WIDE-NEXT: retl
%res = fptosi <2 x float> %src to <2 x i8>
ret <2 x i8> %res
@@ -136,11 +133,8 @@ define <2 x i16> @cvt_v2f32_v2i16(<2 x float> %src) {
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2i16:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-WIDE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; CHECK-WIDE-NEXT: vzeroupper
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; CHECK-WIDE-NEXT: retl
%res = fptosi <2 x float> %src to <2 x i16>
ret <2 x i16> %res
@@ -170,11 +164,8 @@ define <2 x i8> @cvt_v2f32_v2u8(<2 x float> %src) {
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2u8:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-WIDE-NEXT: vcvttss2si %xmm1, %eax
-; CHECK-WIDE-NEXT: vcvttss2si %xmm0, %ecx
-; CHECK-WIDE-NEXT: vmovd %ecx, %xmm0
-; CHECK-WIDE-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; CHECK-WIDE-NEXT: retl
%res = fptoui <2 x float> %src to <2 x i8>
ret <2 x i8> %res
@@ -189,11 +180,8 @@ define <2 x i16> @cvt_v2f32_v2u16(<2 x float> %src) {
;
; CHECK-WIDE-LABEL: cvt_v2f32_v2u16:
; CHECK-WIDE: ## %bb.0:
-; CHECK-WIDE-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0
-; CHECK-WIDE-NEXT: vcvttps2dq %ymm0, %ymm0
-; CHECK-WIDE-NEXT: vextractf128 $1, %ymm0, %xmm1
-; CHECK-WIDE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; CHECK-WIDE-NEXT: vzeroupper
+; CHECK-WIDE-NEXT: vcvttps2dq %xmm0, %xmm0
+; CHECK-WIDE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; CHECK-WIDE-NEXT: retl
%res = fptoui <2 x float> %src to <2 x i16>
ret <2 x i16> %res
diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll b/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll
index f4e0147e3f1..47078daa925 100644
--- a/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll
+++ b/llvm/test/CodeGen/X86/vec_fp_to_int-widen.ll
@@ -2310,31 +2310,17 @@ define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind {
define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
; SSE-LABEL: fptosi_2f32_to_2i8:
; SSE: # %bb.0:
-; SSE-NEXT: cvttss2si %xmm0, %eax
-; SSE-NEXT: movzbl %al, %eax
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-NEXT: cvttss2si %xmm0, %ecx
-; SSE-NEXT: shll $8, %ecx
-; SSE-NEXT: orl %eax, %ecx
-; SSE-NEXT: movd %ecx, %xmm0
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
;
-; VEX-LABEL: fptosi_2f32_to_2i8:
-; VEX: # %bb.0:
-; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; VEX-NEXT: vcvttss2si %xmm1, %eax
-; VEX-NEXT: vcvttss2si %xmm0, %ecx
-; VEX-NEXT: vmovd %ecx, %xmm0
-; VEX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; VEX-NEXT: retq
-;
-; AVX512-LABEL: fptosi_2f32_to_2i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
-; AVX512-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: fptosi_2f32_to_2i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: retq
%cvt = fptosi <2 x float> %a to <2 x i8>
ret <2 x i8> %cvt
}
@@ -2342,64 +2328,15 @@ define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) {
define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
; SSE-LABEL: fptosi_2f32_to_2i16:
; SSE: # %bb.0:
-; SSE-NEXT: cvttss2si %xmm0, %eax
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-NEXT: cvttss2si %xmm0, %ecx
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pinsrw $1, %ecx, %xmm0
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-NEXT: retq
;
-; AVX1-LABEL: fptosi_2f32_to_2i16:
-; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: fptosi_2f32_to_2i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: fptosi_2f32_to_2i16:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: fptosi_2f32_to_2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
-;
-; AVX512DQ-LABEL: fptosi_2f32_to_2i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
-;
-; AVX512VLDQ-LABEL: fptosi_2f32_to_2i16:
-; AVX512VLDQ: # %bb.0:
-; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VLDQ-NEXT: vzeroupper
-; AVX512VLDQ-NEXT: retq
+; AVX-LABEL: fptosi_2f32_to_2i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: retq
%cvt = fptosi <2 x float> %a to <2 x i16>
ret <2 x i16> %cvt
}
@@ -2407,31 +2344,17 @@ define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) {
define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
; SSE-LABEL: fptoui_2f32_to_2i8:
; SSE: # %bb.0:
-; SSE-NEXT: cvttss2si %xmm0, %eax
-; SSE-NEXT: movzbl %al, %eax
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-NEXT: cvttss2si %xmm0, %ecx
-; SSE-NEXT: shll $8, %ecx
-; SSE-NEXT: orl %eax, %ecx
-; SSE-NEXT: movd %ecx, %xmm0
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
;
-; VEX-LABEL: fptoui_2f32_to_2i8:
-; VEX: # %bb.0:
-; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; VEX-NEXT: vcvttss2si %xmm1, %eax
-; VEX-NEXT: vcvttss2si %xmm0, %ecx
-; VEX-NEXT: vmovd %ecx, %xmm0
-; VEX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; VEX-NEXT: retq
-;
-; AVX512-LABEL: fptoui_2f32_to_2i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0
-; AVX512-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: fptoui_2f32_to_2i8:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: retq
%cvt = fptoui <2 x float> %a to <2 x i8>
ret <2 x i8> %cvt
}
@@ -2439,64 +2362,15 @@ define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) {
define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
; SSE-LABEL: fptoui_2f32_to_2i16:
; SSE: # %bb.0:
-; SSE-NEXT: cvttss2si %xmm0, %eax
-; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-NEXT: cvttss2si %xmm0, %ecx
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pinsrw $1, %ecx, %xmm0
+; SSE-NEXT: cvttps2dq %xmm0, %xmm0
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-NEXT: retq
;
-; AVX1-LABEL: fptoui_2f32_to_2i16:
-; AVX1: # %bb.0:
-; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: fptoui_2f32_to_2i16:
-; AVX2: # %bb.0:
-; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: fptoui_2f32_to_2i16:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512F-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: fptoui_2f32_to_2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
-;
-; AVX512DQ-LABEL: fptoui_2f32_to_2i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512DQ-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
-;
-; AVX512VLDQ-LABEL: fptoui_2f32_to_2i16:
-; AVX512VLDQ: # %bb.0:
-; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
-; AVX512VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0
-; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VLDQ-NEXT: vzeroupper
-; AVX512VLDQ-NEXT: retq
+; AVX-LABEL: fptoui_2f32_to_2i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
+; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: retq
%cvt = fptoui <2 x float> %a to <2 x i16>
ret <2 x i16> %cvt
}
@@ -2504,22 +2378,16 @@ define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) {
define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
; SSE-LABEL: fptosi_2f64_to_2i8:
; SSE: # %bb.0:
-; SSE-NEXT: cvttsd2si %xmm0, %eax
-; SSE-NEXT: movzbl %al, %eax
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: cvttsd2si %xmm0, %ecx
-; SSE-NEXT: shll $8, %ecx
-; SSE-NEXT: orl %eax, %ecx
-; SSE-NEXT: movd %ecx, %xmm0
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: andpd {{.*}}(%rip), %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fptosi_2f64_to_2i8:
; AVX: # %bb.0:
-; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vcvttsd2si %xmm1, %eax
-; AVX-NEXT: vcvttsd2si %xmm0, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm0
-; AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: retq
%cvt = fptosi <2 x double> %a to <2 x i8>
ret <2 x i8> %cvt
@@ -2528,55 +2396,15 @@ define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) {
define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
; SSE-LABEL: fptosi_2f64_to_2i16:
; SSE: # %bb.0:
-; SSE-NEXT: cvttsd2si %xmm0, %eax
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: cvttsd2si %xmm0, %ecx
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pinsrw $1, %ecx, %xmm0
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-NEXT: retq
;
-; VEX-LABEL: fptosi_2f64_to_2i16:
-; VEX: # %bb.0:
-; VEX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; VEX-NEXT: vcvttsd2si %xmm1, %eax
-; VEX-NEXT: vcvttsd2si %xmm0, %ecx
-; VEX-NEXT: vmovd %ecx, %xmm0
-; VEX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
-; VEX-NEXT: retq
-;
-; AVX512F-LABEL: fptosi_2f64_to_2i16:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: fptosi_2f64_to_2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
-;
-; AVX512DQ-LABEL: fptosi_2f64_to_2i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
-;
-; AVX512VLDQ-LABEL: fptosi_2f64_to_2i16:
-; AVX512VLDQ: # %bb.0:
-; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VLDQ-NEXT: vzeroupper
-; AVX512VLDQ-NEXT: retq
+; AVX-LABEL: fptosi_2f64_to_2i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: retq
%cvt = fptosi <2 x double> %a to <2 x i16>
ret <2 x i16> %cvt
}
@@ -2584,22 +2412,16 @@ define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) {
define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
; SSE-LABEL: fptoui_2f64_to_2i8:
; SSE: # %bb.0:
-; SSE-NEXT: cvttsd2si %xmm0, %eax
-; SSE-NEXT: movzbl %al, %eax
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: cvttsd2si %xmm0, %ecx
-; SSE-NEXT: shll $8, %ecx
-; SSE-NEXT: orl %eax, %ecx
-; SSE-NEXT: movd %ecx, %xmm0
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: andpd {{.*}}(%rip), %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fptoui_2f64_to_2i8:
; AVX: # %bb.0:
-; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-NEXT: vcvttsd2si %xmm1, %eax
-; AVX-NEXT: vcvttsd2si %xmm0, %ecx
-; AVX-NEXT: vmovd %ecx, %xmm0
-; AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: retq
%cvt = fptoui <2 x double> %a to <2 x i8>
ret <2 x i8> %cvt
@@ -2608,55 +2430,15 @@ define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) {
define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) {
; SSE-LABEL: fptoui_2f64_to_2i16:
; SSE: # %bb.0:
-; SSE-NEXT: cvttsd2si %xmm0, %eax
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-NEXT: cvttsd2si %xmm0, %ecx
-; SSE-NEXT: movd %eax, %xmm0
-; SSE-NEXT: pinsrw $1, %ecx, %xmm0
+; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
+; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; SSE-NEXT: retq
;
-; VEX-LABEL: fptoui_2f64_to_2i16:
-; VEX: # %bb.0:
-; VEX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; VEX-NEXT: vcvttsd2si %xmm1, %eax
-; VEX-NEXT: vcvttsd2si %xmm0, %ecx
-; VEX-NEXT: vmovd %ecx, %xmm0
-; VEX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
-; VEX-NEXT: retq
-;
-; AVX512F-LABEL: fptoui_2f64_to_2i16:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512VL-LABEL: fptoui_2f64_to_2i16:
-; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VL-NEXT: vzeroupper
-; AVX512VL-NEXT: retq
-;
-; AVX512DQ-LABEL: fptoui_2f64_to_2i16:
-; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
-;
-; AVX512VLDQ-LABEL: fptoui_2f64_to_2i16:
-; AVX512VLDQ: # %bb.0:
-; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0
-; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0
-; AVX512VLDQ-NEXT: vzeroupper
-; AVX512VLDQ-NEXT: retq
+; AVX-LABEL: fptoui_2f64_to_2i16:
+; AVX: # %bb.0:
+; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
+; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: retq
%cvt = fptoui <2 x double> %a to <2 x i16>
ret <2 x i16> %cvt
}
OpenPOWER on IntegriCloud