diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-01-01 21:12:18 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-01-01 21:12:18 +0000 |
| commit | c8898b3640a3220cd1d9945d12e050caa583798b (patch) | |
| tree | 5c7e2b3c714b138a662db63072fdba04dd860905 | |
| parent | bb8b79b0a01d88eb25bc28b86ffe7e0b5f531135 (diff) | |
| download | bcm5719-llvm-c8898b3640a3220cd1d9945d12e050caa583798b.tar.gz bcm5719-llvm-c8898b3640a3220cd1d9945d12e050caa583798b.zip | |
[X86] Promote vXi1 fp_to_uint/fp_to_sint to vXi32 to avoid scalarization.
llvm-svn: 321632
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 33 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-cvt.ll | 3521 |
2 files changed, 137 insertions, 3417 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 58a48985bfb..304ff21bb38 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1160,6 +1160,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom); + setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32); + setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32); + setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32); + setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32); + setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32); + setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32); + if (Subtarget.hasVLX()) { + setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); + } + // Extends of v16i1/v8i1 to 128-bit vectors. setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v16i8, Custom); @@ -16671,9 +16682,29 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); if (VT.isVector()) { - assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!"); SDValue Src = Op.getOperand(0); SDLoc dl(Op); + + if (VT == MVT::v2i1 && Src.getSimpleValueType() == MVT::v2f64) { + MVT ResVT = MVT::v4i32; + MVT TruncVT = MVT::v4i1; + unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; + if (!IsSigned && !Subtarget.hasVLX()) { + // Widen to 512-bits. + ResVT = MVT::v8i32; + TruncVT = MVT::v8i1; + Opc = ISD::FP_TO_UINT; + Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, + DAG.getUNDEF(MVT::v8f64), + Src, DAG.getIntPtrConstant(0, dl)); + } + SDValue Res = DAG.getNode(Opc, dl, ResVT, Src); + Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res, + DAG.getIntPtrConstant(0, dl)); + } + + assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!"); if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) { return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl, VT, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index c657b10fbf2..df66935b75b 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -2034,43 +2034,13 @@ define <2 x i64> @test_2f64toub(<2 x double> %a, <2 x i64> %passthru) { ; NOVLDQ-NEXT: vzeroupper ; NOVLDQ-NEXT: retq ; -; VLBW-LABEL: test_2f64toub: -; VLBW: # %bb.0: -; VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLBW-NEXT: vcvttsd2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k0 -; VLBW-NEXT: vcvttsd2si %xmm0, %eax -; VLBW-NEXT: andl $1, %eax -; VLBW-NEXT: kmovw %eax, %k1 -; VLBW-NEXT: kshiftrw $1, %k0, %k2 -; VLBW-NEXT: kshiftlw $1, %k2, %k2 -; VLBW-NEXT: korw %k1, %k2, %k1 -; VLBW-NEXT: kshiftrw $1, %k1, %k2 -; VLBW-NEXT: kxorw %k0, %k2, %k0 -; VLBW-NEXT: kshiftlw $15, %k0, %k0 -; VLBW-NEXT: kshiftrw $14, %k0, %k0 -; VLBW-NEXT: kxorw %k1, %k0, %k1 -; VLBW-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} -; VLBW-NEXT: retq -; -; VLNOBW-LABEL: test_2f64toub: -; VLNOBW: # %bb.0: -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLNOBW-NEXT: vcvttsd2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k0 -; VLNOBW-NEXT: vcvttsd2si %xmm0, %eax -; VLNOBW-NEXT: andl $1, %eax -; VLNOBW-NEXT: kmovw %eax, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k0, %k2 -; VLNOBW-NEXT: kshiftlw $1, %k2, %k2 -; VLNOBW-NEXT: korw %k1, %k2, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k1, %k2 -; VLNOBW-NEXT: kxorw %k0, %k2, %k0 -; VLNOBW-NEXT: kshiftlw $15, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $14, %k0, %k0 -; VLNOBW-NEXT: kxorw %k1, %k0, %k1 -; VLNOBW-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} -; VLNOBW-NEXT: retq +; VL-LABEL: test_2f64toub: +; VL: # %bb.0: +; VL-NEXT: vcvttpd2udq %xmm0, %xmm0 +; VL-NEXT: vpslld $31, %xmm0, %xmm0 +; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VL-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; VL-NEXT: retq ; ; AVX512DQ-LABEL: test_2f64toub: ; AVX512DQ: # %bb.0: @@ -2097,534 +2067,34 @@ define <4 x i64> @test_4f64toub(<4 x double> %a, <4 x i64> %passthru) { ; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; NOVL-NEXT: retq ; -; VLBW-LABEL: test_4f64toub: -; VLBW: # %bb.0: -; VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLBW-NEXT: vcvttsd2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k0 -; VLBW-NEXT: vcvttsd2si %xmm0, %eax -; VLBW-NEXT: andl $1, %eax -; VLBW-NEXT: kmovw %eax, %k1 -; VLBW-NEXT: kshiftrw $1, %k0, %k2 -; VLBW-NEXT: kshiftlw $1, %k2, %k2 -; VLBW-NEXT: korw %k1, %k2, %k1 -; VLBW-NEXT: kshiftrw $1, %k1, %k2 -; VLBW-NEXT: kxorw %k0, %k2, %k0 -; VLBW-NEXT: kshiftlw $15, %k0, %k0 -; VLBW-NEXT: kshiftrw $14, %k0, %k0 -; VLBW-NEXT: kxorw %k1, %k0, %k0 -; VLBW-NEXT: kshiftrw $2, %k0, %k1 -; VLBW-NEXT: vextractf128 $1, %ymm0, %xmm0 -; VLBW-NEXT: vcvttsd2si %xmm0, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $13, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $3, %k0, %k1 -; VLBW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; VLBW-NEXT: vcvttsd2si %xmm0, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $12, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k1 -; VLBW-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} -; VLBW-NEXT: retq -; -; VLNOBW-LABEL: test_4f64toub: -; VLNOBW: # %bb.0: -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLNOBW-NEXT: vcvttsd2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k0 -; VLNOBW-NEXT: vcvttsd2si %xmm0, %eax -; VLNOBW-NEXT: andl $1, %eax -; VLNOBW-NEXT: kmovw %eax, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k0, %k2 -; VLNOBW-NEXT: kshiftlw $1, %k2, %k2 -; VLNOBW-NEXT: korw %k1, %k2, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k1, %k2 -; VLNOBW-NEXT: kxorw %k0, %k2, %k0 -; VLNOBW-NEXT: kshiftlw $15, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $14, %k0, %k0 -; VLNOBW-NEXT: kxorw %k1, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $2, %k0, %k1 -; VLNOBW-NEXT: vextractf128 $1, %ymm0, %xmm0 -; VLNOBW-NEXT: vcvttsd2si %xmm0, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $13, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $3, %k0, %k1 -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; VLNOBW-NEXT: vcvttsd2si %xmm0, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $12, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k1 -; VLNOBW-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} -; VLNOBW-NEXT: retq +; VL-LABEL: test_4f64toub: +; VL: # %bb.0: +; VL-NEXT: vcvttpd2dq %ymm0, %xmm0 +; VL-NEXT: vpslld $31, %xmm0, %xmm0 +; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VL-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; VL-NEXT: retq %mask = fptoui <4 x double> %a to <4 x i1> %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer ret <4 x i64> %select } define <8 x i64> @test_8f64toub(<8 x double> %a, <8 x i64> %passthru) { -; KNL-LABEL: test_8f64toub: -; KNL: # %bb.0: -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; KNL-NEXT: vcvttsd2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: vcvttsd2si %xmm0, %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftrw $1, %k0, %k2 -; KNL-NEXT: kshiftlw $1, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k2 -; KNL-NEXT: kxorw %k0, %k2, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kxorw %k1, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k1 -; KNL-NEXT: vextractf128 $1, %ymm0, %xmm2 -; KNL-NEXT: vcvttsd2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $13, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; KNL-NEXT: vcvttsd2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k1 -; KNL-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; KNL-NEXT: vcvttsd2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $11, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; KNL-NEXT: vcvttsd2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $10, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k1 -; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; KNL-NEXT: vcvttsd2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $9, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; KNL-NEXT: vcvttsd2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $8, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k1 -; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; KNL-NEXT: retq -; -; SKX-LABEL: test_8f64toub: -; SKX: # %bb.0: -; SKX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; SKX-NEXT: vcvttsd2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: vcvttsd2si %xmm0, %eax -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: kshiftrb $1, %k1, %k2 -; SKX-NEXT: kxorb %k0, %k2, %k0 -; SKX-NEXT: kshiftlb $7, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k0 -; SKX-NEXT: kxorb %k1, %k0, %k0 -; SKX-NEXT: kshiftrb $2, %k0, %k1 -; SKX-NEXT: vextractf128 $1, %ymm0, %xmm2 -; SKX-NEXT: vcvttsd2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $5, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $3, %k0, %k1 -; SKX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; SKX-NEXT: vcvttsd2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $4, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $4, %k0, %k1 -; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; SKX-NEXT: vcvttsd2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $3, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $5, %k0, %k1 -; SKX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; SKX-NEXT: vcvttsd2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $2, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 -; SKX-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; SKX-NEXT: vcvttsd2si %xmm0, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $1, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftlb $1, %k0, %k0 -; SKX-NEXT: kshiftrb $1, %k0, %k0 -; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; SKX-NEXT: vcvttsd2si %xmm0, %eax -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: korb %k1, %k0, %k1 -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; SKX-NEXT: retq -; -; AVX512VL-LABEL: test_8f64toub: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k0 -; AVX512VL-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VL-NEXT: andl $1, %eax -; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: kshiftrw $1, %k0, %k2 -; AVX512VL-NEXT: kshiftlw $1, %k2, %k2 -; AVX512VL-NEXT: korw %k1, %k2, %k1 -; AVX512VL-NEXT: kshiftrw $1, %k1, %k2 -; AVX512VL-NEXT: kxorw %k0, %k2, %k0 -; AVX512VL-NEXT: kshiftlw $15, %k0, %k0 -; AVX512VL-NEXT: kshiftrw $14, %k0, %k0 -; AVX512VL-NEXT: kxorw %k1, %k0, %k0 -; AVX512VL-NEXT: kshiftrw $2, %k0, %k1 -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512VL-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $13, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $3, %k0, %k1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VL-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $12, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $4, %k0, %k1 -; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512VL-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $11, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $5, %k0, %k1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VL-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $10, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $6, %k0, %k1 -; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512VL-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $9, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $7, %k0, %k1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $8, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k1 -; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512VL-NEXT: retq -; -; AVX512DQ-LABEL: test_8f64toub: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k0 -; AVX512DQ-NEXT: vcvttsd2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftrb $1, %k1, %k2 -; AVX512DQ-NEXT: kxorb %k0, %k2, %k0 -; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $6, %k0, %k0 -; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $2, %k0, %k1 -; AVX512DQ-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $5, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $3, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $4, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $4, %k0, %k1 -; AVX512DQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $3, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $5, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $2, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $6, %k0, %k1 -; AVX512DQ-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512DQ-NEXT: vcvttsd2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttsd2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: korb %k1, %k0, %k1 -; AVX512DQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512DQ-NEXT: retq -; -; AVX512BW-LABEL: test_8f64toub: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k0 -; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax -; AVX512BW-NEXT: andl $1, %eax -; AVX512BW-NEXT: kmovw %eax, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k0, %k2 -; AVX512BW-NEXT: kshiftlw $1, %k2, %k2 -; AVX512BW-NEXT: korw %k1, %k2, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512BW-NEXT: kxorw %k0, %k2, %k0 -; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512BW-NEXT: kxorw %k1, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $3, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $12, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $4, %k0, %k1 -; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $11, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $5, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $10, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $6, %k0, %k1 -; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $9, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $7, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k1 -; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VLDQ-LABEL: test_8f64toub: -; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VLDQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k0 -; AVX512VLDQ-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k1 -; AVX512VLDQ-NEXT: kshiftrb $1, %k1, %k2 -; AVX512VLDQ-NEXT: kxorb %k0, %k2, %k0 -; AVX512VLDQ-NEXT: kshiftlb $7, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k0 -; AVX512VLDQ-NEXT: kxorb %k1, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $2, %k0, %k1 -; AVX512VLDQ-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512VLDQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $5, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $3, %k0, %k1 -; AVX512VLDQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VLDQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $4, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $4, %k0, %k1 -; AVX512VLDQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512VLDQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $3, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $5, %k0, %k1 -; AVX512VLDQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VLDQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $2, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1 -; AVX512VLDQ-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512VLDQ-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $1, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftlb $1, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $1, %k0, %k0 -; AVX512VLDQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VLDQ-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: korb %k1, %k0, %k1 -; AVX512VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512VLDQ-NEXT: retq +; NOVL-LABEL: test_8f64toub: +; NOVL: # %bb.0: +; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 +; NOVL-NEXT: vpslld $31, %ymm0, %ymm0 +; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVL-NEXT: retq ; -; AVX512VLBW-LABEL: test_8f64toub: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VLBW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k0 -; AVX512VLBW-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VLBW-NEXT: andl $1, %eax -; AVX512VLBW-NEXT: kmovw %eax, %k1 -; AVX512VLBW-NEXT: kshiftrw $1, %k0, %k2 -; AVX512VLBW-NEXT: kshiftlw $1, %k2, %k2 -; AVX512VLBW-NEXT: korw %k1, %k2, %k1 -; AVX512VLBW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512VLBW-NEXT: kxorw %k0, %k2, %k0 -; AVX512VLBW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512VLBW-NEXT: kxorw %k1, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512VLBW-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512VLBW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $3, %k0, %k1 -; AVX512VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VLBW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $12, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $4, %k0, %k1 -; AVX512VLBW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512VLBW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $11, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $5, %k0, %k1 -; AVX512VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VLBW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $10, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $6, %k0, %k1 -; AVX512VLBW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512VLBW-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $9, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $7, %k0, %k1 -; AVX512VLBW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VLBW-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k1 -; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512VLBW-NEXT: retq +; VL-LABEL: test_8f64toub: +; VL: # %bb.0: +; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 +; VL-NEXT: vpslld $31, %ymm0, %ymm0 +; VL-NEXT: vptestmd %ymm0, %ymm0, %k1 +; VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; VL-NEXT: retq %mask = fptoui <8 x double> %a to <8 x i1> %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer ret <8 x i64> %select @@ -2698,1169 +2168,47 @@ define <4 x i64> @test_4f32toub(<4 x float> %a, <4 x i64> %passthru) { ; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; NOVL-NEXT: retq ; -; VLBW-LABEL: test_4f32toub: -; VLBW: # %bb.0: -; VLBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k0 -; VLBW-NEXT: vcvttss2si %xmm0, %eax -; VLBW-NEXT: andl $1, %eax -; VLBW-NEXT: kmovw %eax, %k1 -; VLBW-NEXT: kshiftrw $1, %k0, %k2 -; VLBW-NEXT: kshiftlw $1, %k2, %k2 -; VLBW-NEXT: korw %k1, %k2, %k1 -; VLBW-NEXT: kshiftrw $1, %k1, %k2 -; VLBW-NEXT: kxorw %k0, %k2, %k0 -; VLBW-NEXT: kshiftlw $15, %k0, %k0 -; VLBW-NEXT: kshiftrw $14, %k0, %k0 -; VLBW-NEXT: kxorw %k1, %k0, %k0 -; VLBW-NEXT: kshiftrw $2, %k0, %k1 -; VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $13, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $3, %k0, %k1 -; VLBW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; VLBW-NEXT: vcvttss2si %xmm0, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $12, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k1 -; VLBW-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} -; VLBW-NEXT: retq -; -; VLNOBW-LABEL: test_4f32toub: -; VLNOBW: # %bb.0: -; VLNOBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k0 -; VLNOBW-NEXT: vcvttss2si %xmm0, %eax -; VLNOBW-NEXT: andl $1, %eax -; VLNOBW-NEXT: kmovw %eax, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k0, %k2 -; VLNOBW-NEXT: kshiftlw $1, %k2, %k2 -; VLNOBW-NEXT: korw %k1, %k2, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k1, %k2 -; VLNOBW-NEXT: kxorw %k0, %k2, %k0 -; VLNOBW-NEXT: kshiftlw $15, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $14, %k0, %k0 -; VLNOBW-NEXT: kxorw %k1, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $2, %k0, %k1 -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $13, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $3, %k0, %k1 -; VLNOBW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; VLNOBW-NEXT: vcvttss2si %xmm0, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $12, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k1 -; VLNOBW-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} -; VLNOBW-NEXT: retq +; VL-LABEL: test_4f32toub: +; VL: # %bb.0: +; VL-NEXT: vcvttps2dq %xmm0, %xmm0 +; VL-NEXT: vpslld $31, %xmm0, %xmm0 +; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VL-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; VL-NEXT: retq %mask = fptoui <4 x float> %a to <4 x i1> %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer ret <4 x i64> %select } define <8 x i64> @test_8f32toub(<8 x float> %a, <8 x i64> %passthru) { -; KNL-LABEL: test_8f32toub: -; KNL: # %bb.0: -; KNL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: vcvttss2si %xmm0, %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftrw $1, %k0, %k2 -; KNL-NEXT: kshiftlw $1, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k2 -; KNL-NEXT: kxorw %k0, %k2, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kxorw %k1, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $13, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k1 -; KNL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k1 -; KNL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; KNL-NEXT: vcvttss2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $11, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k1 -; KNL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $10, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $9, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k1 -; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; KNL-NEXT: vcvttss2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $8, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k1 -; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; KNL-NEXT: retq -; -; SKX-LABEL: test_8f32toub: -; SKX: # %bb.0: -; SKX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SKX-NEXT: vcvttss2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: vcvttss2si %xmm0, %eax -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: kshiftrb $1, %k1, %k2 -; SKX-NEXT: kxorb %k0, %k2, %k0 -; SKX-NEXT: kshiftlb $7, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k0 -; SKX-NEXT: kxorb %k1, %k0, %k0 -; SKX-NEXT: kshiftrb $2, %k0, %k1 -; SKX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; SKX-NEXT: vcvttss2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $5, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $3, %k0, %k1 -; SKX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; SKX-NEXT: vcvttss2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $4, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $4, %k0, %k1 -; SKX-NEXT: vextractf128 $1, %ymm0, %xmm0 -; SKX-NEXT: vcvttss2si %xmm0, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $3, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $5, %k0, %k1 -; SKX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SKX-NEXT: vcvttss2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $2, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 -; SKX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; SKX-NEXT: vcvttss2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $1, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftlb $1, %k0, %k0 -; SKX-NEXT: kshiftrb $1, %k0, %k0 -; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; SKX-NEXT: vcvttss2si %xmm0, %eax -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: korb %k1, %k0, %k1 -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; SKX-NEXT: retq -; -; AVX512VL-LABEL: test_8f32toub: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vcvttss2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k0 -; AVX512VL-NEXT: vcvttss2si %xmm0, %eax -; AVX512VL-NEXT: andl $1, %eax -; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: kshiftrw $1, %k0, %k2 -; AVX512VL-NEXT: kshiftlw $1, %k2, %k2 -; AVX512VL-NEXT: korw %k1, %k2, %k1 -; AVX512VL-NEXT: kshiftrw $1, %k1, %k2 -; AVX512VL-NEXT: kxorw %k0, %k2, %k0 -; AVX512VL-NEXT: kshiftlw $15, %k0, %k0 -; AVX512VL-NEXT: kshiftrw $14, %k0, %k0 -; AVX512VL-NEXT: kxorw %k1, %k0, %k0 -; AVX512VL-NEXT: kshiftrw $2, %k0, %k1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vcvttss2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $13, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $3, %k0, %k1 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512VL-NEXT: vcvttss2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $12, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $4, %k0, %k1 -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vcvttss2si %xmm0, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $11, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $5, %k0, %k1 -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vcvttss2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $10, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $6, %k0, %k1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vcvttss2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $9, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $7, %k0, %k1 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VL-NEXT: vcvttss2si %xmm0, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $8, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k1 -; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512VL-NEXT: retq -; -; AVX512DQ-LABEL: test_8f32toub: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k0 -; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftrb $1, %k1, %k2 -; AVX512DQ-NEXT: kxorb %k0, %k2, %k0 -; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $6, %k0, %k0 -; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $2, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $5, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $3, %k0, %k1 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $4, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $4, %k0, %k1 -; AVX512DQ-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $3, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $5, %k0, %k1 -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $2, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $6, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: korb %k1, %k0, %k1 -; AVX512DQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512DQ-NEXT: retq -; -; AVX512BW-LABEL: test_8f32toub: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k0 -; AVX512BW-NEXT: vcvttss2si %xmm0, %eax -; AVX512BW-NEXT: andl $1, %eax -; AVX512BW-NEXT: kmovw %eax, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k0, %k2 -; AVX512BW-NEXT: kshiftlw $1, %k2, %k2 -; AVX512BW-NEXT: korw %k1, %k2, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512BW-NEXT: kxorw %k0, %k2, %k0 -; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512BW-NEXT: kxorw %k1, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $3, %k0, %k1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $12, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $4, %k0, %k1 -; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512BW-NEXT: vcvttss2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $11, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $5, %k0, %k1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $10, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $6, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $9, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $7, %k0, %k1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vcvttss2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k1 -; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VLDQ-LABEL: test_8f32toub: -; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VLDQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k0 -; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k1 -; AVX512VLDQ-NEXT: kshiftrb $1, %k1, %k2 -; AVX512VLDQ-NEXT: kxorb %k0, %k2, %k0 -; AVX512VLDQ-NEXT: kshiftlb $7, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k0 -; AVX512VLDQ-NEXT: kxorb %k1, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $2, %k0, %k1 -; AVX512VLDQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VLDQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $5, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $3, %k0, %k1 -; AVX512VLDQ-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512VLDQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $4, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $4, %k0, %k1 -; AVX512VLDQ-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $3, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $5, %k0, %k1 -; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VLDQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $2, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1 -; AVX512VLDQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VLDQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $1, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftlb $1, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $1, %k0, %k0 -; AVX512VLDQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: korb %k1, %k0, %k1 -; AVX512VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512VLDQ-NEXT: retq +; NOVL-LABEL: test_8f32toub: +; NOVL: # %bb.0: +; NOVL-NEXT: vcvttps2dq %ymm0, %ymm0 +; NOVL-NEXT: vpslld $31, %ymm0, %ymm0 +; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVL-NEXT: retq ; -; AVX512VLBW-LABEL: test_8f32toub: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VLBW-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k0 -; AVX512VLBW-NEXT: vcvttss2si %xmm0, %eax -; AVX512VLBW-NEXT: andl $1, %eax -; AVX512VLBW-NEXT: kmovw %eax, %k1 -; AVX512VLBW-NEXT: kshiftrw $1, %k0, %k2 -; AVX512VLBW-NEXT: kshiftlw $1, %k2, %k2 -; AVX512VLBW-NEXT: korw %k1, %k2, %k1 -; AVX512VLBW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512VLBW-NEXT: kxorw %k0, %k2, %k0 -; AVX512VLBW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512VLBW-NEXT: kxorw %k1, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VLBW-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $3, %k0, %k1 -; AVX512VLBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512VLBW-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $12, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $4, %k0, %k1 -; AVX512VLBW-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VLBW-NEXT: vcvttss2si %xmm0, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $11, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $5, %k0, %k1 -; AVX512VLBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VLBW-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $10, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $6, %k0, %k1 -; AVX512VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VLBW-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $9, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $7, %k0, %k1 -; AVX512VLBW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VLBW-NEXT: vcvttss2si %xmm0, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k1 -; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512VLBW-NEXT: retq +; VL-LABEL: test_8f32toub: +; VL: # %bb.0: +; VL-NEXT: vcvttps2dq %ymm0, %ymm0 +; VL-NEXT: vpslld $31, %ymm0, %ymm0 +; VL-NEXT: vptestmd %ymm0, %ymm0, %k1 +; VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; VL-NEXT: retq %mask = fptoui <8 x float> %a to <8 x i1> %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer ret <8 x i64> %select } define <16 x i32> @test_16f32toub(<16 x float> %a, <16 x i32> %passthru) { -; KNL-LABEL: test_16f32toub: -; KNL: # %bb.0: -; KNL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: vcvttss2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k2 -; KNL-NEXT: kxorw %k0, %k2, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kxorw %k1, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $13, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k1 -; KNL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k1 -; KNL-NEXT: vextractf128 $1, %ymm0, %xmm2 -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $11, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k1 -; KNL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; KNL-NEXT: vcvttss2si %xmm3, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $10, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; KNL-NEXT: vcvttss2si %xmm3, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $9, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k1 -; KNL-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $8, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k1 -; KNL-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $7, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $9, %k0, %k1 -; KNL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; KNL-NEXT: vcvttss2si %xmm3, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $6, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; KNL-NEXT: vcvttss2si %xmm3, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $5, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k1 -; KNL-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $4, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k1 -; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; KNL-NEXT: vcvttss2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $3, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k1 -; KNL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $2, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; KNL-NEXT: vcvttss2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k1 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} -; KNL-NEXT: retq -; -; VLBW-LABEL: test_16f32toub: -; VLBW: # %bb.0: -; VLBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k0 -; VLBW-NEXT: vcvttss2si %xmm0, %eax -; VLBW-NEXT: kmovd %eax, %k1 -; VLBW-NEXT: kshiftrw $1, %k1, %k2 -; VLBW-NEXT: kxorw %k0, %k2, %k0 -; VLBW-NEXT: kshiftlw $15, %k0, %k0 -; VLBW-NEXT: kshiftrw $14, %k0, %k0 -; VLBW-NEXT: kxorw %k1, %k0, %k0 -; VLBW-NEXT: kshiftrw $2, %k0, %k1 -; VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $13, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $3, %k0, %k1 -; VLBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $12, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $4, %k0, %k1 -; VLBW-NEXT: vextractf128 $1, %ymm0, %xmm2 -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $11, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $5, %k0, %k1 -; VLBW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; VLBW-NEXT: vcvttss2si %xmm3, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $10, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $6, %k0, %k1 -; VLBW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; VLBW-NEXT: vcvttss2si %xmm3, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $9, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $7, %k0, %k1 -; VLBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $8, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $8, %k0, %k1 -; VLBW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $7, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $9, %k0, %k1 -; VLBW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; VLBW-NEXT: vcvttss2si %xmm3, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $6, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $10, %k0, %k1 -; VLBW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; VLBW-NEXT: vcvttss2si %xmm3, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $5, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $11, %k0, %k1 -; VLBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $4, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $12, %k0, %k1 -; VLBW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; VLBW-NEXT: vcvttss2si %xmm0, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $3, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $13, %k0, %k1 -; VLBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $2, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $14, %k0, %k1 -; VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $1, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftlw $1, %k0, %k0 -; VLBW-NEXT: kshiftrw $1, %k0, %k0 -; VLBW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; VLBW-NEXT: vcvttss2si %xmm0, %eax -; VLBW-NEXT: kmovd %eax, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: korw %k1, %k0, %k1 -; VLBW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} -; VLBW-NEXT: retq -; -; VLNOBW-LABEL: test_16f32toub: -; VLNOBW: # %bb.0: -; VLNOBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k0 -; VLNOBW-NEXT: vcvttss2si %xmm0, %eax -; VLNOBW-NEXT: kmovw %eax, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k1, %k2 -; VLNOBW-NEXT: kxorw %k0, %k2, %k0 -; VLNOBW-NEXT: kshiftlw $15, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $14, %k0, %k0 -; VLNOBW-NEXT: kxorw %k1, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $2, %k0, %k1 -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $13, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $3, %k0, %k1 -; VLNOBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $12, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $4, %k0, %k1 -; VLNOBW-NEXT: vextractf128 $1, %ymm0, %xmm2 -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $11, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $5, %k0, %k1 -; VLNOBW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; VLNOBW-NEXT: vcvttss2si %xmm3, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $10, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $6, %k0, %k1 -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; VLNOBW-NEXT: vcvttss2si %xmm3, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $9, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $7, %k0, %k1 -; VLNOBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $8, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $8, %k0, %k1 -; VLNOBW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $7, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $9, %k0, %k1 -; VLNOBW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; VLNOBW-NEXT: vcvttss2si %xmm3, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $6, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $10, %k0, %k1 -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; VLNOBW-NEXT: vcvttss2si %xmm3, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $5, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $11, %k0, %k1 -; VLNOBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $4, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $12, %k0, %k1 -; VLNOBW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; VLNOBW-NEXT: vcvttss2si %xmm0, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $3, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $13, %k0, %k1 -; VLNOBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $2, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $14, %k0, %k1 -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftlw $1, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $1, %k0, %k0 -; VLNOBW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; VLNOBW-NEXT: vcvttss2si %xmm0, %eax -; VLNOBW-NEXT: kmovw %eax, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: korw %k1, %k0, %k1 -; VLNOBW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} -; VLNOBW-NEXT: retq -; -; AVX512DQ-LABEL: test_16f32toub: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k0 -; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftrw $1, %k1, %k2 -; AVX512DQ-NEXT: kxorw %k0, %k2, %k0 -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 -; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $2, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $13, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $3, %k0, %k1 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $12, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $4, %k0, %k1 -; AVX512DQ-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $11, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $5, %k0, %k1 -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2si %xmm3, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $10, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $6, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; AVX512DQ-NEXT: vcvttss2si %xmm3, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $7, %k0, %k1 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $8, %k0, %k1 -; AVX512DQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $7, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $9, %k0, %k1 -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2si %xmm3, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $6, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $10, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; AVX512DQ-NEXT: vcvttss2si %xmm3, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $5, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $11, %k0, %k1 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $4, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1 -; AVX512DQ-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $3, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $13, %k0, %k1 -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $2, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $14, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k1 -; AVX512DQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} -; AVX512DQ-NEXT: retq -; -; AVX512BW-LABEL: test_16f32toub: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k0 -; AVX512BW-NEXT: vcvttss2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512BW-NEXT: kxorw %k0, %k2, %k0 -; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512BW-NEXT: kxorw %k1, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $3, %k0, %k1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $12, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $4, %k0, %k1 -; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $11, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $5, %k0, %k1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX512BW-NEXT: vcvttss2si %xmm3, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $10, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $6, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; AVX512BW-NEXT: vcvttss2si %xmm3, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $9, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $7, %k0, %k1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $8, %k0, %k1 -; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $7, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $9, %k0, %k1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX512BW-NEXT: vcvttss2si %xmm3, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $6, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $10, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; AVX512BW-NEXT: vcvttss2si %xmm3, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $5, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $11, %k0, %k1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $4, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $12, %k0, %k1 -; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512BW-NEXT: vcvttss2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $3, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $13, %k0, %k1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $2, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $14, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftlw $1, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $1, %k0, %k0 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vcvttss2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: korw %k1, %k0, %k1 -; AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq +; ALL-LABEL: test_16f32toub: +; ALL: # %bb.0: +; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 +; ALL-NEXT: vpslld $31, %zmm0, %zmm0 +; ALL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; ALL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; ALL-NEXT: retq %mask = fptoui <16 x float> %a to <16 x i1> %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer ret <16 x i32> %select @@ -3878,43 +2226,13 @@ define <2 x i64> @test_2f64tosb(<2 x double> %a, <2 x i64> %passthru) { ; NOVLDQ-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NOVLDQ-NEXT: retq ; -; VLBW-LABEL: test_2f64tosb: -; VLBW: # %bb.0: -; VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLBW-NEXT: vcvttsd2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k0 -; VLBW-NEXT: vcvttsd2si %xmm0, %eax -; VLBW-NEXT: andl $1, %eax -; VLBW-NEXT: kmovw %eax, %k1 -; VLBW-NEXT: kshiftrw $1, %k0, %k2 -; VLBW-NEXT: kshiftlw $1, %k2, %k2 -; VLBW-NEXT: korw %k1, %k2, %k1 -; VLBW-NEXT: kshiftrw $1, %k1, %k2 -; VLBW-NEXT: kxorw %k0, %k2, %k0 -; VLBW-NEXT: kshiftlw $15, %k0, %k0 -; VLBW-NEXT: kshiftrw $14, %k0, %k0 -; VLBW-NEXT: kxorw %k1, %k0, %k1 -; VLBW-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} -; VLBW-NEXT: retq -; -; VLNOBW-LABEL: test_2f64tosb: -; VLNOBW: # %bb.0: -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLNOBW-NEXT: vcvttsd2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k0 -; VLNOBW-NEXT: vcvttsd2si %xmm0, %eax -; VLNOBW-NEXT: andl $1, %eax -; VLNOBW-NEXT: kmovw %eax, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k0, %k2 -; VLNOBW-NEXT: kshiftlw $1, %k2, %k2 -; VLNOBW-NEXT: korw %k1, %k2, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k1, %k2 -; VLNOBW-NEXT: kxorw %k0, %k2, %k0 -; VLNOBW-NEXT: kshiftlw $15, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $14, %k0, %k0 -; VLNOBW-NEXT: kxorw %k1, %k0, %k1 -; VLNOBW-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} -; VLNOBW-NEXT: retq +; VL-LABEL: test_2f64tosb: +; VL: # %bb.0: +; VL-NEXT: vcvttpd2dq %xmm0, %xmm0 +; VL-NEXT: vpslld $31, %xmm0, %xmm0 +; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VL-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; VL-NEXT: retq ; ; AVX512DQ-LABEL: test_2f64tosb: ; AVX512DQ: # %bb.0: @@ -3936,534 +2254,31 @@ define <4 x i64> @test_4f64tosb(<4 x double> %a, <4 x i64> %passthru) { ; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; NOVL-NEXT: retq ; -; VLBW-LABEL: test_4f64tosb: -; VLBW: # %bb.0: -; VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLBW-NEXT: vcvttsd2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k0 -; VLBW-NEXT: vcvttsd2si %xmm0, %eax -; VLBW-NEXT: andl $1, %eax -; VLBW-NEXT: kmovw %eax, %k1 -; VLBW-NEXT: kshiftrw $1, %k0, %k2 -; VLBW-NEXT: kshiftlw $1, %k2, %k2 -; VLBW-NEXT: korw %k1, %k2, %k1 -; VLBW-NEXT: kshiftrw $1, %k1, %k2 -; VLBW-NEXT: kxorw %k0, %k2, %k0 -; VLBW-NEXT: kshiftlw $15, %k0, %k0 -; VLBW-NEXT: kshiftrw $14, %k0, %k0 -; VLBW-NEXT: kxorw %k1, %k0, %k0 -; VLBW-NEXT: kshiftrw $2, %k0, %k1 -; VLBW-NEXT: vextractf128 $1, %ymm0, %xmm0 -; VLBW-NEXT: vcvttsd2si %xmm0, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $13, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $3, %k0, %k1 -; VLBW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; VLBW-NEXT: vcvttsd2si %xmm0, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $12, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k1 -; VLBW-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} -; VLBW-NEXT: retq -; -; VLNOBW-LABEL: test_4f64tosb: -; VLNOBW: # %bb.0: -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLNOBW-NEXT: vcvttsd2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k0 -; VLNOBW-NEXT: vcvttsd2si %xmm0, %eax -; VLNOBW-NEXT: andl $1, %eax -; VLNOBW-NEXT: kmovw %eax, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k0, %k2 -; VLNOBW-NEXT: kshiftlw $1, %k2, %k2 -; VLNOBW-NEXT: korw %k1, %k2, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k1, %k2 -; VLNOBW-NEXT: kxorw %k0, %k2, %k0 -; VLNOBW-NEXT: kshiftlw $15, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $14, %k0, %k0 -; VLNOBW-NEXT: kxorw %k1, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $2, %k0, %k1 -; VLNOBW-NEXT: vextractf128 $1, %ymm0, %xmm0 -; VLNOBW-NEXT: vcvttsd2si %xmm0, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $13, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $3, %k0, %k1 -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; VLNOBW-NEXT: vcvttsd2si %xmm0, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $12, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k1 -; VLNOBW-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} -; VLNOBW-NEXT: retq +; VL-LABEL: test_4f64tosb: +; VL: # %bb.0: +; VL-NEXT: vcvttpd2dq %ymm0, %xmm0 +; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VL-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; VL-NEXT: retq %mask = fptosi <4 x double> %a to <4 x i1> %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer ret <4 x i64> %select } define <8 x i64> @test_8f64tosb(<8 x double> %a, <8 x i64> %passthru) { -; KNL-LABEL: test_8f64tosb: -; KNL: # %bb.0: -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; KNL-NEXT: vcvttsd2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: vcvttsd2si %xmm0, %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftrw $1, %k0, %k2 -; KNL-NEXT: kshiftlw $1, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k2 -; KNL-NEXT: kxorw %k0, %k2, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kxorw %k1, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k1 -; KNL-NEXT: vextractf128 $1, %ymm0, %xmm2 -; KNL-NEXT: vcvttsd2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $13, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; KNL-NEXT: vcvttsd2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k1 -; KNL-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; KNL-NEXT: vcvttsd2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $11, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; KNL-NEXT: vcvttsd2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $10, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k1 -; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; KNL-NEXT: vcvttsd2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $9, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; KNL-NEXT: vcvttsd2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $8, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k1 -; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; KNL-NEXT: retq -; -; SKX-LABEL: test_8f64tosb: -; SKX: # %bb.0: -; SKX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; SKX-NEXT: vcvttsd2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: vcvttsd2si %xmm0, %eax -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: kshiftrb $1, %k1, %k2 -; SKX-NEXT: kxorb %k0, %k2, %k0 -; SKX-NEXT: kshiftlb $7, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k0 -; SKX-NEXT: kxorb %k1, %k0, %k0 -; SKX-NEXT: kshiftrb $2, %k0, %k1 -; SKX-NEXT: vextractf128 $1, %ymm0, %xmm2 -; SKX-NEXT: vcvttsd2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $5, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $3, %k0, %k1 -; SKX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; SKX-NEXT: vcvttsd2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $4, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $4, %k0, %k1 -; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; SKX-NEXT: vcvttsd2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $3, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $5, %k0, %k1 -; SKX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; SKX-NEXT: vcvttsd2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $2, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 -; SKX-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; SKX-NEXT: vcvttsd2si %xmm0, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $1, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftlb $1, %k0, %k0 -; SKX-NEXT: kshiftrb $1, %k0, %k0 -; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; SKX-NEXT: vcvttsd2si %xmm0, %eax -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: korb %k1, %k0, %k1 -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; SKX-NEXT: retq -; -; AVX512VL-LABEL: test_8f64tosb: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k0 -; AVX512VL-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VL-NEXT: andl $1, %eax -; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: kshiftrw $1, %k0, %k2 -; AVX512VL-NEXT: kshiftlw $1, %k2, %k2 -; AVX512VL-NEXT: korw %k1, %k2, %k1 -; AVX512VL-NEXT: kshiftrw $1, %k1, %k2 -; AVX512VL-NEXT: kxorw %k0, %k2, %k0 -; AVX512VL-NEXT: kshiftlw $15, %k0, %k0 -; AVX512VL-NEXT: kshiftrw $14, %k0, %k0 -; AVX512VL-NEXT: kxorw %k1, %k0, %k0 -; AVX512VL-NEXT: kshiftrw $2, %k0, %k1 -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512VL-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $13, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $3, %k0, %k1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VL-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $12, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $4, %k0, %k1 -; AVX512VL-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512VL-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $11, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $5, %k0, %k1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VL-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $10, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $6, %k0, %k1 -; AVX512VL-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512VL-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $9, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $7, %k0, %k1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $8, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k1 -; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512VL-NEXT: retq -; -; AVX512DQ-LABEL: test_8f64tosb: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k0 -; AVX512DQ-NEXT: vcvttsd2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftrb $1, %k1, %k2 -; AVX512DQ-NEXT: kxorb %k0, %k2, %k0 -; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $6, %k0, %k0 -; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $2, %k0, %k1 -; AVX512DQ-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $5, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $3, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $4, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $4, %k0, %k1 -; AVX512DQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $3, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $5, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512DQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $2, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $6, %k0, %k1 -; AVX512DQ-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512DQ-NEXT: vcvttsd2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttsd2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: korb %k1, %k0, %k1 -; AVX512DQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512DQ-NEXT: retq -; -; AVX512BW-LABEL: test_8f64tosb: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k0 -; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax -; AVX512BW-NEXT: andl $1, %eax -; AVX512BW-NEXT: kmovw %eax, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k0, %k2 -; AVX512BW-NEXT: kshiftlw $1, %k2, %k2 -; AVX512BW-NEXT: korw %k1, %k2, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512BW-NEXT: kxorw %k0, %k2, %k0 -; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512BW-NEXT: kxorw %k1, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $3, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $12, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $4, %k0, %k1 -; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $11, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $5, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512BW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $10, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $6, %k0, %k1 -; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $9, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $7, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512BW-NEXT: vcvttsd2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k1 -; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VLDQ-LABEL: test_8f64tosb: -; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VLDQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k0 -; AVX512VLDQ-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k1 -; AVX512VLDQ-NEXT: kshiftrb $1, %k1, %k2 -; AVX512VLDQ-NEXT: kxorb %k0, %k2, %k0 -; AVX512VLDQ-NEXT: kshiftlb $7, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k0 -; AVX512VLDQ-NEXT: kxorb %k1, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $2, %k0, %k1 -; AVX512VLDQ-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512VLDQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $5, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $3, %k0, %k1 -; AVX512VLDQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VLDQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $4, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $4, %k0, %k1 -; AVX512VLDQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512VLDQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $3, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $5, %k0, %k1 -; AVX512VLDQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VLDQ-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $2, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1 -; AVX512VLDQ-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512VLDQ-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $1, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftlb $1, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $1, %k0, %k0 -; AVX512VLDQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VLDQ-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: korb %k1, %k0, %k1 -; AVX512VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512VLDQ-NEXT: retq +; NOVL-LABEL: test_8f64tosb: +; NOVL: # %bb.0: +; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 +; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVL-NEXT: retq ; -; AVX512VLBW-LABEL: test_8f64tosb: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VLBW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k0 -; AVX512VLBW-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VLBW-NEXT: andl $1, %eax -; AVX512VLBW-NEXT: kmovw %eax, %k1 -; AVX512VLBW-NEXT: kshiftrw $1, %k0, %k2 -; AVX512VLBW-NEXT: kshiftlw $1, %k2, %k2 -; AVX512VLBW-NEXT: korw %k1, %k2, %k1 -; AVX512VLBW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512VLBW-NEXT: kxorw %k0, %k2, %k0 -; AVX512VLBW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512VLBW-NEXT: kxorw %k1, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512VLBW-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512VLBW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $3, %k0, %k1 -; AVX512VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VLBW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $12, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $4, %k0, %k1 -; AVX512VLBW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512VLBW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $11, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $5, %k0, %k1 -; AVX512VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] -; AVX512VLBW-NEXT: vcvttsd2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $10, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $6, %k0, %k1 -; AVX512VLBW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512VLBW-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $9, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $7, %k0, %k1 -; AVX512VLBW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VLBW-NEXT: vcvttsd2si %xmm0, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k1 -; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512VLBW-NEXT: retq +; VL-LABEL: test_8f64tosb: +; VL: # %bb.0: +; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 +; VL-NEXT: vptestmd %ymm0, %ymm0, %k1 +; VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; VL-NEXT: retq %mask = fptosi <8 x double> %a to <8 x i1> %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer ret <8 x i64> %select @@ -4531,1169 +2346,43 @@ define <4 x i64> @test_4f32tosb(<4 x float> %a, <4 x i64> %passthru) { ; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0 ; NOVL-NEXT: retq ; -; VLBW-LABEL: test_4f32tosb: -; VLBW: # %bb.0: -; VLBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k0 -; VLBW-NEXT: vcvttss2si %xmm0, %eax -; VLBW-NEXT: andl $1, %eax -; VLBW-NEXT: kmovw %eax, %k1 -; VLBW-NEXT: kshiftrw $1, %k0, %k2 -; VLBW-NEXT: kshiftlw $1, %k2, %k2 -; VLBW-NEXT: korw %k1, %k2, %k1 -; VLBW-NEXT: kshiftrw $1, %k1, %k2 -; VLBW-NEXT: kxorw %k0, %k2, %k0 -; VLBW-NEXT: kshiftlw $15, %k0, %k0 -; VLBW-NEXT: kshiftrw $14, %k0, %k0 -; VLBW-NEXT: kxorw %k1, %k0, %k0 -; VLBW-NEXT: kshiftrw $2, %k0, %k1 -; VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $13, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $3, %k0, %k1 -; VLBW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; VLBW-NEXT: vcvttss2si %xmm0, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $12, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k1 -; VLBW-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} -; VLBW-NEXT: retq -; -; VLNOBW-LABEL: test_4f32tosb: -; VLNOBW: # %bb.0: -; VLNOBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k0 -; VLNOBW-NEXT: vcvttss2si %xmm0, %eax -; VLNOBW-NEXT: andl $1, %eax -; VLNOBW-NEXT: kmovw %eax, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k0, %k2 -; VLNOBW-NEXT: kshiftlw $1, %k2, %k2 -; VLNOBW-NEXT: korw %k1, %k2, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k1, %k2 -; VLNOBW-NEXT: kxorw %k0, %k2, %k0 -; VLNOBW-NEXT: kshiftlw $15, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $14, %k0, %k0 -; VLNOBW-NEXT: kxorw %k1, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $2, %k0, %k1 -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $13, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $3, %k0, %k1 -; VLNOBW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; VLNOBW-NEXT: vcvttss2si %xmm0, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $12, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k1 -; VLNOBW-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} -; VLNOBW-NEXT: retq +; VL-LABEL: test_4f32tosb: +; VL: # %bb.0: +; VL-NEXT: vcvttps2dq %xmm0, %xmm0 +; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VL-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; VL-NEXT: retq %mask = fptosi <4 x float> %a to <4 x i1> %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer ret <4 x i64> %select } define <8 x i64> @test_8f32tosb(<8 x float> %a, <8 x i64> %passthru) { -; KNL-LABEL: test_8f32tosb: -; KNL: # %bb.0: -; KNL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: vcvttss2si %xmm0, %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftrw $1, %k0, %k2 -; KNL-NEXT: kshiftlw $1, %k2, %k2 -; KNL-NEXT: korw %k1, %k2, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k2 -; KNL-NEXT: kxorw %k0, %k2, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kxorw %k1, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $13, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k1 -; KNL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k1 -; KNL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; KNL-NEXT: vcvttss2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $11, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k1 -; KNL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $10, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $9, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k1 -; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; KNL-NEXT: vcvttss2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $8, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k1 -; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; KNL-NEXT: retq -; -; SKX-LABEL: test_8f32tosb: -; SKX: # %bb.0: -; SKX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SKX-NEXT: vcvttss2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k0 -; SKX-NEXT: vcvttss2si %xmm0, %eax -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: kshiftrb $1, %k1, %k2 -; SKX-NEXT: kxorb %k0, %k2, %k0 -; SKX-NEXT: kshiftlb $7, %k0, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k0 -; SKX-NEXT: kxorb %k1, %k0, %k0 -; SKX-NEXT: kshiftrb $2, %k0, %k1 -; SKX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; SKX-NEXT: vcvttss2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $5, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $3, %k0, %k1 -; SKX-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; SKX-NEXT: vcvttss2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $4, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $4, %k0, %k1 -; SKX-NEXT: vextractf128 $1, %ymm0, %xmm0 -; SKX-NEXT: vcvttss2si %xmm0, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $3, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $5, %k0, %k1 -; SKX-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SKX-NEXT: vcvttss2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $2, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftrb $6, %k0, %k1 -; SKX-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; SKX-NEXT: vcvttss2si %xmm2, %eax -; SKX-NEXT: kmovd %eax, %k2 -; SKX-NEXT: kxorb %k2, %k1, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: kshiftrb $1, %k1, %k1 -; SKX-NEXT: kxorb %k0, %k1, %k0 -; SKX-NEXT: kshiftlb $1, %k0, %k0 -; SKX-NEXT: kshiftrb $1, %k0, %k0 -; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; SKX-NEXT: vcvttss2si %xmm0, %eax -; SKX-NEXT: kmovd %eax, %k1 -; SKX-NEXT: kshiftlb $7, %k1, %k1 -; SKX-NEXT: korb %k1, %k0, %k1 -; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; SKX-NEXT: retq -; -; AVX512VL-LABEL: test_8f32tosb: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vcvttss2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k0 -; AVX512VL-NEXT: vcvttss2si %xmm0, %eax -; AVX512VL-NEXT: andl $1, %eax -; AVX512VL-NEXT: kmovw %eax, %k1 -; AVX512VL-NEXT: kshiftrw $1, %k0, %k2 -; AVX512VL-NEXT: kshiftlw $1, %k2, %k2 -; AVX512VL-NEXT: korw %k1, %k2, %k1 -; AVX512VL-NEXT: kshiftrw $1, %k1, %k2 -; AVX512VL-NEXT: kxorw %k0, %k2, %k0 -; AVX512VL-NEXT: kshiftlw $15, %k0, %k0 -; AVX512VL-NEXT: kshiftrw $14, %k0, %k0 -; AVX512VL-NEXT: kxorw %k1, %k0, %k0 -; AVX512VL-NEXT: kshiftrw $2, %k0, %k1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vcvttss2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $13, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $3, %k0, %k1 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512VL-NEXT: vcvttss2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $12, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $4, %k0, %k1 -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vcvttss2si %xmm0, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $11, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $5, %k0, %k1 -; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VL-NEXT: vcvttss2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $10, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $6, %k0, %k1 -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VL-NEXT: vcvttss2si %xmm2, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $9, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k0 -; AVX512VL-NEXT: kshiftrw $7, %k0, %k1 -; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VL-NEXT: vcvttss2si %xmm0, %eax -; AVX512VL-NEXT: kmovw %eax, %k2 -; AVX512VL-NEXT: kxorw %k2, %k1, %k1 -; AVX512VL-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VL-NEXT: kshiftrw $8, %k1, %k1 -; AVX512VL-NEXT: kxorw %k0, %k1, %k1 -; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512VL-NEXT: retq -; -; AVX512DQ-LABEL: test_8f32tosb: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k0 -; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftrb $1, %k1, %k2 -; AVX512DQ-NEXT: kxorb %k0, %k2, %k0 -; AVX512DQ-NEXT: kshiftlb $7, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $6, %k0, %k0 -; AVX512DQ-NEXT: kxorb %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $2, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $5, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $3, %k0, %k1 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $4, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $4, %k0, %k1 -; AVX512DQ-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $3, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $5, %k0, %k1 -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $2, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrb $6, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: kshiftrb $1, %k1, %k1 -; AVX512DQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrb $1, %k0, %k0 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512DQ-NEXT: korb %k1, %k0, %k1 -; AVX512DQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512DQ-NEXT: retq -; -; AVX512BW-LABEL: test_8f32tosb: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k0 -; AVX512BW-NEXT: vcvttss2si %xmm0, %eax -; AVX512BW-NEXT: andl $1, %eax -; AVX512BW-NEXT: kmovw %eax, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k0, %k2 -; AVX512BW-NEXT: kshiftlw $1, %k2, %k2 -; AVX512BW-NEXT: korw %k1, %k2, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512BW-NEXT: kxorw %k0, %k2, %k0 -; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512BW-NEXT: kxorw %k1, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $3, %k0, %k1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $12, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $4, %k0, %k1 -; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512BW-NEXT: vcvttss2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $11, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $5, %k0, %k1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $10, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $6, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $9, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $7, %k0, %k1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vcvttss2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k1 -; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; AVX512VLDQ-LABEL: test_8f32tosb: -; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VLDQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k0 -; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k1 -; AVX512VLDQ-NEXT: kshiftrb $1, %k1, %k2 -; AVX512VLDQ-NEXT: kxorb %k0, %k2, %k0 -; AVX512VLDQ-NEXT: kshiftlb $7, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k0 -; AVX512VLDQ-NEXT: kxorb %k1, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $2, %k0, %k1 -; AVX512VLDQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VLDQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $5, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $3, %k0, %k1 -; AVX512VLDQ-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512VLDQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $4, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $4, %k0, %k1 -; AVX512VLDQ-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $3, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $5, %k0, %k1 -; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VLDQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $2, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftrb $6, %k0, %k1 -; AVX512VLDQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VLDQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k2 -; AVX512VLDQ-NEXT: kxorb %k2, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: kshiftrb $1, %k1, %k1 -; AVX512VLDQ-NEXT: kxorb %k0, %k1, %k0 -; AVX512VLDQ-NEXT: kshiftlb $1, %k0, %k0 -; AVX512VLDQ-NEXT: kshiftrb $1, %k0, %k0 -; AVX512VLDQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512VLDQ-NEXT: kmovw %eax, %k1 -; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 -; AVX512VLDQ-NEXT: korb %k1, %k0, %k1 -; AVX512VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512VLDQ-NEXT: retq +; NOVL-LABEL: test_8f32tosb: +; NOVL: # %bb.0: +; NOVL-NEXT: vcvttps2dq %ymm0, %ymm0 +; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVL-NEXT: retq ; -; AVX512VLBW-LABEL: test_8f32tosb: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VLBW-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k0 -; AVX512VLBW-NEXT: vcvttss2si %xmm0, %eax -; AVX512VLBW-NEXT: andl $1, %eax -; AVX512VLBW-NEXT: kmovw %eax, %k1 -; AVX512VLBW-NEXT: kshiftrw $1, %k0, %k2 -; AVX512VLBW-NEXT: kshiftlw $1, %k2, %k2 -; AVX512VLBW-NEXT: korw %k1, %k2, %k1 -; AVX512VLBW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512VLBW-NEXT: kxorw %k0, %k2, %k0 -; AVX512VLBW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512VLBW-NEXT: kxorw %k1, %k0, %k0 -; AVX512VLBW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VLBW-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $3, %k0, %k1 -; AVX512VLBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512VLBW-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $12, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $4, %k0, %k1 -; AVX512VLBW-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VLBW-NEXT: vcvttss2si %xmm0, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $11, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $5, %k0, %k1 -; AVX512VLBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512VLBW-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $10, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $6, %k0, %k1 -; AVX512VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512VLBW-NEXT: vcvttss2si %xmm2, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $9, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k0 -; AVX512VLBW-NEXT: kshiftrw $7, %k0, %k1 -; AVX512VLBW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VLBW-NEXT: vcvttss2si %xmm0, %eax -; AVX512VLBW-NEXT: kmovd %eax, %k2 -; AVX512VLBW-NEXT: kxorw %k2, %k1, %k1 -; AVX512VLBW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512VLBW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512VLBW-NEXT: kxorw %k0, %k1, %k1 -; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; AVX512VLBW-NEXT: retq +; VL-LABEL: test_8f32tosb: +; VL: # %bb.0: +; VL-NEXT: vcvttps2dq %ymm0, %ymm0 +; VL-NEXT: vptestmd %ymm0, %ymm0, %k1 +; VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; VL-NEXT: retq %mask = fptosi <8 x float> %a to <8 x i1> %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer ret <8 x i64> %select } define <16 x i32> @test_16f32tosb(<16 x float> %a, <16 x i32> %passthru) { -; KNL-LABEL: test_16f32tosb: -; KNL: # %bb.0: -; KNL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: vcvttss2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k2 -; KNL-NEXT: kxorw %k0, %k2, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k0 -; KNL-NEXT: kxorw %k1, %k0, %k0 -; KNL-NEXT: kshiftrw $2, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $13, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $3, %k0, %k1 -; KNL-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $12, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $4, %k0, %k1 -; KNL-NEXT: vextractf128 $1, %ymm0, %xmm2 -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $11, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $5, %k0, %k1 -; KNL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; KNL-NEXT: vcvttss2si %xmm3, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $10, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $6, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; KNL-NEXT: vcvttss2si %xmm3, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $9, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $7, %k0, %k1 -; KNL-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $8, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $8, %k0, %k1 -; KNL-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $7, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $9, %k0, %k1 -; KNL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; KNL-NEXT: vcvttss2si %xmm3, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $6, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $10, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; KNL-NEXT: vcvttss2si %xmm3, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $5, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $11, %k0, %k1 -; KNL-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $4, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $12, %k0, %k1 -; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; KNL-NEXT: vcvttss2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $3, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $13, %k0, %k1 -; KNL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $2, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftrw $14, %k0, %k1 -; KNL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; KNL-NEXT: vcvttss2si %xmm2, %eax -; KNL-NEXT: kmovw %eax, %k2 -; KNL-NEXT: kxorw %k2, %k1, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k1 -; KNL-NEXT: kxorw %k0, %k1, %k0 -; KNL-NEXT: kshiftlw $1, %k0, %k0 -; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; KNL-NEXT: vcvttss2si %xmm0, %eax -; KNL-NEXT: kmovw %eax, %k1 -; KNL-NEXT: kshiftlw $15, %k1, %k1 -; KNL-NEXT: korw %k1, %k0, %k1 -; KNL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} -; KNL-NEXT: retq -; -; VLBW-LABEL: test_16f32tosb: -; VLBW: # %bb.0: -; VLBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k0 -; VLBW-NEXT: vcvttss2si %xmm0, %eax -; VLBW-NEXT: kmovd %eax, %k1 -; VLBW-NEXT: kshiftrw $1, %k1, %k2 -; VLBW-NEXT: kxorw %k0, %k2, %k0 -; VLBW-NEXT: kshiftlw $15, %k0, %k0 -; VLBW-NEXT: kshiftrw $14, %k0, %k0 -; VLBW-NEXT: kxorw %k1, %k0, %k0 -; VLBW-NEXT: kshiftrw $2, %k0, %k1 -; VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $13, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $3, %k0, %k1 -; VLBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $12, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $4, %k0, %k1 -; VLBW-NEXT: vextractf128 $1, %ymm0, %xmm2 -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $11, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $5, %k0, %k1 -; VLBW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; VLBW-NEXT: vcvttss2si %xmm3, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $10, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $6, %k0, %k1 -; VLBW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; VLBW-NEXT: vcvttss2si %xmm3, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $9, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $7, %k0, %k1 -; VLBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $8, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $8, %k0, %k1 -; VLBW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $7, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $9, %k0, %k1 -; VLBW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; VLBW-NEXT: vcvttss2si %xmm3, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $6, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $10, %k0, %k1 -; VLBW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; VLBW-NEXT: vcvttss2si %xmm3, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $5, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $11, %k0, %k1 -; VLBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $4, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $12, %k0, %k1 -; VLBW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; VLBW-NEXT: vcvttss2si %xmm0, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $3, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $13, %k0, %k1 -; VLBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $2, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftrw $14, %k0, %k1 -; VLBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLBW-NEXT: vcvttss2si %xmm2, %eax -; VLBW-NEXT: kmovd %eax, %k2 -; VLBW-NEXT: kxorw %k2, %k1, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: kshiftrw $1, %k1, %k1 -; VLBW-NEXT: kxorw %k0, %k1, %k0 -; VLBW-NEXT: kshiftlw $1, %k0, %k0 -; VLBW-NEXT: kshiftrw $1, %k0, %k0 -; VLBW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; VLBW-NEXT: vcvttss2si %xmm0, %eax -; VLBW-NEXT: kmovd %eax, %k1 -; VLBW-NEXT: kshiftlw $15, %k1, %k1 -; VLBW-NEXT: korw %k1, %k0, %k1 -; VLBW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} -; VLBW-NEXT: retq -; -; VLNOBW-LABEL: test_16f32tosb: -; VLNOBW: # %bb.0: -; VLNOBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k0 -; VLNOBW-NEXT: vcvttss2si %xmm0, %eax -; VLNOBW-NEXT: kmovw %eax, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k1, %k2 -; VLNOBW-NEXT: kxorw %k0, %k2, %k0 -; VLNOBW-NEXT: kshiftlw $15, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $14, %k0, %k0 -; VLNOBW-NEXT: kxorw %k1, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $2, %k0, %k1 -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $13, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $3, %k0, %k1 -; VLNOBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $12, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $4, %k0, %k1 -; VLNOBW-NEXT: vextractf128 $1, %ymm0, %xmm2 -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $11, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $5, %k0, %k1 -; VLNOBW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; VLNOBW-NEXT: vcvttss2si %xmm3, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $10, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $6, %k0, %k1 -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; VLNOBW-NEXT: vcvttss2si %xmm3, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $9, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $7, %k0, %k1 -; VLNOBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $8, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $8, %k0, %k1 -; VLNOBW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $7, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $9, %k0, %k1 -; VLNOBW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; VLNOBW-NEXT: vcvttss2si %xmm3, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $6, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $10, %k0, %k1 -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; VLNOBW-NEXT: vcvttss2si %xmm3, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $5, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $11, %k0, %k1 -; VLNOBW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $4, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $12, %k0, %k1 -; VLNOBW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; VLNOBW-NEXT: vcvttss2si %xmm0, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $3, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $13, %k0, %k1 -; VLNOBW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $2, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftrw $14, %k0, %k1 -; VLNOBW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; VLNOBW-NEXT: vcvttss2si %xmm2, %eax -; VLNOBW-NEXT: kmovw %eax, %k2 -; VLNOBW-NEXT: kxorw %k2, %k1, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: kshiftrw $1, %k1, %k1 -; VLNOBW-NEXT: kxorw %k0, %k1, %k0 -; VLNOBW-NEXT: kshiftlw $1, %k0, %k0 -; VLNOBW-NEXT: kshiftrw $1, %k0, %k0 -; VLNOBW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; VLNOBW-NEXT: vcvttss2si %xmm0, %eax -; VLNOBW-NEXT: kmovw %eax, %k1 -; VLNOBW-NEXT: kshiftlw $15, %k1, %k1 -; VLNOBW-NEXT: korw %k1, %k0, %k1 -; VLNOBW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} -; VLNOBW-NEXT: retq -; -; AVX512DQ-LABEL: test_16f32tosb: -; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k0 -; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftrw $1, %k1, %k2 -; AVX512DQ-NEXT: kxorw %k0, %k2, %k0 -; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 -; AVX512DQ-NEXT: kxorw %k1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $2, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $13, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $3, %k0, %k1 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $12, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $4, %k0, %k1 -; AVX512DQ-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $11, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $5, %k0, %k1 -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2si %xmm3, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $10, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $6, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; AVX512DQ-NEXT: vcvttss2si %xmm3, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $7, %k0, %k1 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $8, %k0, %k1 -; AVX512DQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $7, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $9, %k0, %k1 -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2si %xmm3, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $6, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $10, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; AVX512DQ-NEXT: vcvttss2si %xmm3, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $5, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $11, %k0, %k1 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $4, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $12, %k0, %k1 -; AVX512DQ-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $3, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $13, %k0, %k1 -; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $2, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftrw $14, %k0, %k1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttss2si %xmm2, %eax -; AVX512DQ-NEXT: kmovw %eax, %k2 -; AVX512DQ-NEXT: kxorw %k2, %k1, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1 -; AVX512DQ-NEXT: kxorw %k0, %k1, %k0 -; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 -; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax -; AVX512DQ-NEXT: kmovw %eax, %k1 -; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 -; AVX512DQ-NEXT: korw %k1, %k0, %k1 -; AVX512DQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} -; AVX512DQ-NEXT: retq -; -; AVX512BW-LABEL: test_16f32tosb: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k0 -; AVX512BW-NEXT: vcvttss2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512BW-NEXT: kxorw %k0, %k2, %k0 -; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512BW-NEXT: kxorw %k1, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $3, %k0, %k1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $12, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $4, %k0, %k1 -; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $11, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $5, %k0, %k1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX512BW-NEXT: vcvttss2si %xmm3, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $10, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $6, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; AVX512BW-NEXT: vcvttss2si %xmm3, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $9, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $7, %k0, %k1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $8, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $8, %k0, %k1 -; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm2 -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $7, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $9, %k0, %k1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] -; AVX512BW-NEXT: vcvttss2si %xmm3, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $6, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $10, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] -; AVX512BW-NEXT: vcvttss2si %xmm3, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $5, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $11, %k0, %k1 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $4, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $12, %k0, %k1 -; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm0 -; AVX512BW-NEXT: vcvttss2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $3, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $13, %k0, %k1 -; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $2, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $14, %k0, %k1 -; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0] -; AVX512BW-NEXT: vcvttss2si %xmm2, %eax -; AVX512BW-NEXT: kmovd %eax, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k1, %k1 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftlw $1, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $1, %k0, %k0 -; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512BW-NEXT: vcvttss2si %xmm0, %eax -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: korw %k1, %k0, %k1 -; AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} -; AVX512BW-NEXT: retq +; ALL-LABEL: test_16f32tosb: +; ALL: # %bb.0: +; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 +; ALL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; ALL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; ALL-NEXT: retq %mask = fptosi <16 x float> %a to <16 x i1> %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer ret <16 x i32> %select |

