diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 123 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 45 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll | 16 |
5 files changed, 124 insertions, 90 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 54e0a02be35..38d6def4ca5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1637,10 +1637,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Legal); - setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, + Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, + Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, + Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, + Subtarget.hasVLX() ? Legal : Custom); for (auto VT : { MVT::v2i64, MVT::v4i64 }) { setOperationAction(ISD::SMAX, VT, Legal); @@ -1665,10 +1669,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (Subtarget.hasDQI()) { for (auto VT : { MVT::v2i64, MVT::v4i64 }) { - setOperationAction(ISD::SINT_TO_FP, VT, Legal); - setOperationAction(ISD::UINT_TO_FP, VT, Legal); - setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal); - setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal); + setOperationAction(ISD::SINT_TO_FP, VT, + Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::UINT_TO_FP, VT, + Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, VT, + Subtarget.hasVLX() ? Legal : Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, VT, + Subtarget.hasVLX() ? Legal : Custom); setOperationAction(ISD::FP_TO_SINT, VT, Legal); setOperationAction(ISD::FP_TO_UINT, VT, Legal); setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); @@ -18570,6 +18578,46 @@ static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG, DAG.getIntPtrConstant(0, DL)); } +static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + assert(Subtarget.hasDQI() && !Subtarget.hasVLX() && "Unexpected features"); + + SDLoc DL(Op); + bool IsStrict = Op->isStrictFPOpcode(); + MVT VT = Op->getSimpleValueType(0); + SDValue Src = Op->getOperand(IsStrict ? 1 : 0); + MVT SrcVT = Src.getSimpleValueType(); + assert((SrcVT == MVT::v2i64 || SrcVT == MVT::v4i64) && + "Unsupported custom type"); + + // With AVX512DQ, but not VLX we need to widen to get a 512-bit result type. + assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v4f64) && + "Unexpected VT!"); + MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64; + + // Need to concat with zero vector for strict fp to avoid spurious + // exceptions. + SDValue Tmp = + IsStrict ? DAG.getConstant(0, DL, MVT::v8i64) : DAG.getUNDEF(MVT::v8i64); + Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i64, Tmp, Src, + DAG.getIntPtrConstant(0, DL)); + SDValue Res, Chain; + if (IsStrict) { + Res = DAG.getNode(Op.getOpcode(), DL, {WideVT, MVT::Other}, + {Op->getOperand(0), Src}); + Chain = Res.getValue(1); + } else { + Res = DAG.getNode(Op.getOpcode(), DL, WideVT, Src); + } + + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); + + if (IsStrict) + return DAG.getMergeValues({Res, Chain}, DL); + return Res; +} + SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { bool IsStrict = Op->isStrictFPOpcode(); @@ -18596,6 +18644,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, DAG.getUNDEF(SrcVT))); } + if (SrcVT == MVT::v2i64 || SrcVT == MVT::v4i64) + return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget); + return SDValue(); } @@ -18936,6 +18987,49 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG, static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { + SDLoc DL(Op); + bool IsStrict = Op->isStrictFPOpcode(); + SDValue V = Op->getOperand(IsStrict ? 1 : 0); + MVT VecIntVT = V.getSimpleValueType(); + assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) && + "Unsupported custom type"); + + if (Subtarget.hasAVX512()) { + // With AVX512, but not VLX we need to widen to get a 512-bit result type. + assert(!Subtarget.hasVLX() && "Unexpected features"); + MVT VT = Op->getSimpleValueType(0); + + // v8i32->v8f64 is legal with AVX512 so just return it. + if (VT == MVT::v8f64) + return Op; + + assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) && + "Unexpected VT!"); + MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32; + MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32; + // Need to concat with zero vector for strict fp to avoid spurious + // exceptions. + SDValue Tmp = + IsStrict ? DAG.getConstant(0, DL, WideIntVT) : DAG.getUNDEF(WideIntVT); + V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideIntVT, Tmp, V, + DAG.getIntPtrConstant(0, DL)); + SDValue Res, Chain; + if (IsStrict) { + Res = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {WideVT, MVT::Other}, + {Op->getOperand(0), V}); + Chain = Res.getValue(1); + } else { + Res = DAG.getNode(ISD::UINT_TO_FP, DL, WideVT, V); + } + + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); + + if (IsStrict) + return DAG.getMergeValues({Res, Chain}, DL); + return Res; + } + // The algorithm is the following: // #ifdef __SSE4_1__ // uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa); @@ -18957,10 +19051,6 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, if (DAG.getTarget().Options.UnsafeFPMath) return SDValue(); - SDLoc DL(Op); - bool IsStrict = Op->isStrictFPOpcode(); - SDValue V = Op->getOperand(IsStrict ? 1 : 0); - MVT VecIntVT = V.getSimpleValueType(); bool Is128 = VecIntVT == MVT::v4i32; MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32; // If we convert to something else than the supported type, e.g., to v4f64, @@ -18968,9 +19058,6 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, if (VecFloatVT != Op->getSimpleValueType(0)) return SDValue(); - assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) && - "Unsupported custom type"); - // In the #idef/#else code, we have in common: // - The vector of constants: // -- 0x4b000000 @@ -19051,8 +19138,10 @@ static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG, return lowerUINT_TO_FP_v2i32(Op, DAG, Subtarget, dl); case MVT::v4i32: case MVT::v8i32: - assert(!Subtarget.hasAVX512()); return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget); + case MVT::v2i64: + case MVT::v4i64: + return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget); } } @@ -19064,7 +19153,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDLoc dl(Op); auto PtrVT = getPointerTy(DAG.getDataLayout()); MVT SrcVT = Src.getSimpleValueType(); - MVT DstVT = Op.getSimpleValueType(); + MVT DstVT = Op->getSimpleValueType(0); SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); if (DstVT == MVT::f128) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index a446afa9c9b..36f7980a08f 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8382,21 +8382,6 @@ def : Pat<(v4i32 (X86any_cvttp2ui (v4f64 VR256X:$src1))), (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_xmm)>; - -def : Pat<(v8f32 (any_uint_to_fp (v8i32 VR256X:$src1))), - (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), - VR256X:$src1, sub_ymm)))), sub_ymm)>; - -def : Pat<(v4f32 (any_uint_to_fp (v4i32 VR128X:$src1))), - (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr - (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), - VR128X:$src1, sub_xmm)))), sub_xmm)>; - -def : Pat<(v4f64 (any_uint_to_fp (v4i32 VR128X:$src1))), - (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr - (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), - VR128X:$src1, sub_xmm)))), sub_ymm)>; } let Predicates = [HasVLX] in { @@ -8513,36 +8498,6 @@ def : Pat<(v4i64 (X86any_cvttp2ui (v4f64 VR256X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; - -def : Pat<(v4f32 (any_sint_to_fp (v4i64 VR256X:$src1))), - (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr - (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), - VR256X:$src1, sub_ymm)))), sub_xmm)>; - -def : Pat<(v2f64 (any_sint_to_fp (v2i64 VR128X:$src1))), - (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr - (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), - VR128X:$src1, sub_xmm)))), sub_xmm)>; - -def : Pat<(v4f64 (any_sint_to_fp (v4i64 VR256X:$src1))), - (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr - (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), - VR256X:$src1, sub_ymm)))), sub_ymm)>; - -def : Pat<(v4f32 (any_uint_to_fp (v4i64 VR256X:$src1))), - (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr - (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), - VR256X:$src1, sub_ymm)))), sub_xmm)>; - -def : Pat<(v2f64 (any_uint_to_fp (v2i64 VR128X:$src1))), - (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr - (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), - VR128X:$src1, sub_xmm)))), sub_xmm)>; - -def : Pat<(v4f64 (any_uint_to_fp (v4i64 VR256X:$src1))), - (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr - (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), - VR256X:$src1, sub_ymm)))), sub_ymm)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll index 9f1a9241581..70b5d333ab8 100644 --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll @@ -537,10 +537,9 @@ define <4 x float> @uitofp_v4i32_v4f32(<4 x i32> %x) #0 { ; AVX1-64-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; AVX1-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512F-LABEL: uitofp_v4i32_v4f32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vmovaps %xmm0, %xmm0 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512F-NEXT: vzeroupper @@ -551,10 +550,9 @@ define <4 x float> @uitofp_v4i32_v4f32(<4 x i32> %x) #0 { ; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 ; AVX512VL-NEXT: ret{{[l|q]}} ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: uitofp_v4i32_v4f32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper @@ -908,10 +906,9 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 { ; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: sitofp_v2i64_v2f64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper @@ -1014,10 +1011,9 @@ define <2 x double> @uitofp_v2i64_v2f64(<2 x i64> %x) #0 { ; AVX512VL-64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX512VL-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: uitofp_v2i64_v2f64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll index e46493d5c19..683e901ae54 100644 --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll @@ -163,10 +163,9 @@ define <8 x float> @uitofp_v8i32_v8f32(<8 x i32> %x) #0 { ; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: ret{{[l|q]}} ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512F-LABEL: uitofp_v8i32_v8f32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vmovaps %ymm0, %ymm0 ; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: ret{{[l|q]}} @@ -176,10 +175,9 @@ define <8 x float> @uitofp_v8i32_v8f32(<8 x i32> %x) #0 { ; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0 ; AVX512VL-NEXT: ret{{[l|q]}} ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: uitofp_v8i32_v8f32: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 ; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} @@ -330,10 +328,9 @@ define <4 x double> @uitofp_v4i32_v4f64(<4 x i32> %x) #0 { ; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: ret{{[l|q]}} ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512F-LABEL: uitofp_v4i32_v4f64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512F-NEXT: vmovaps %xmm0, %xmm0 ; AVX512F-NEXT: vcvtudq2pd %ymm0, %zmm0 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512F-NEXT: ret{{[l|q]}} @@ -343,10 +340,9 @@ define <4 x double> @uitofp_v4i32_v4f64(<4 x i32> %x) #0 { ; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0 ; AVX512VL-NEXT: ret{{[l|q]}} ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: uitofp_v4i32_v4f64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} @@ -412,10 +408,9 @@ define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 { ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: sitofp_v4i64_v4f64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} @@ -503,10 +498,9 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 { ; AVX512VL-64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ; AVX512VL-64-NEXT: retq ; -; FIXME: This is an unsafe behavior for strict FP ; AVX512DQ-LABEL: uitofp_v4i64_v4f64: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512DQ-NEXT: ret{{[l|q]}} diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index 75f16029e65..c80e1b281dd 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -6318,7 +6318,7 @@ define <2 x double> @constrained_vector_sitofp_v2f64_v2i64(<2 x i64> %x) #0 { ; ; AVX512DQ-LABEL: constrained_vector_sitofp_v2f64_v2i64: ; AVX512DQ: # %bb.0: # %entry -; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper @@ -6617,7 +6617,7 @@ define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 { ; ; AVX512DQ-LABEL: constrained_vector_sitofp_v4f64_v4i64: ; AVX512DQ: # %bb.0: # %entry -; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 ; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512DQ-NEXT: retq @@ -6687,7 +6687,7 @@ define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 { ; ; AVX512DQ-LABEL: constrained_vector_sitofp_v4f32_v4i64: ; AVX512DQ: # %bb.0: # %entry -; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 ; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512DQ-NEXT: vzeroupper @@ -6939,7 +6939,7 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 { ; ; AVX512DQ-LABEL: constrained_vector_uitofp_v2f64_v2i64: ; AVX512DQ: # %bb.0: # %entry -; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper @@ -7348,7 +7348,7 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 { ; ; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512-NEXT: vmovaps %xmm0, %xmm0 ; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0 ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq @@ -7383,7 +7383,7 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 { ; ; AVX512-LABEL: constrained_vector_uitofp_v4f32_v4i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512-NEXT: vmovaps %xmm0, %xmm0 ; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper @@ -7448,7 +7448,7 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 { ; ; AVX512DQ-LABEL: constrained_vector_uitofp_v4f64_v4i64: ; AVX512DQ: # %bb.0: # %entry -; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 ; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 ; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512DQ-NEXT: retq @@ -7605,7 +7605,7 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 { ; ; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64: ; AVX512DQ: # %bb.0: # %entry -; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 ; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 ; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512DQ-NEXT: vzeroupper |

