summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2019-12-26 21:46:29 -0800
committerCraig Topper <craig.topper@gmail.com>2019-12-26 22:04:40 -0800
commitecbaf152f8fe2b5b8ebc633541e15158984f4db4 (patch)
tree77453b8078ab9a82ff116981285a3f076ddd6b1d
parent50fb3957c1906d2e971a192e5bd69b40bc5a3919 (diff)
downloadbcm5719-llvm-ecbaf152f8fe2b5b8ebc633541e15158984f4db4.tar.gz
bcm5719-llvm-ecbaf152f8fe2b5b8ebc633541e15158984f4db4.zip
[X86] Custom widen 128/256-bit vXi32 fp_to_uint on avx512f targets without avx512vl. Similar for vXi64 on avx512dq without avx512vl.
Summary: Previously we did this with isel patterns that used garbage in the widened part of the source. But that's not valid for strictfp. So now we custom widen and use zeroes for the widened elemens for strictfp. This replaces D71864. Reviewers: RKSimon, spatel, andrew.w.kaylor, pengfei, LiuChen3 Reviewed By: pengfei Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71879
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp106
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td49
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp1
-rw-r--r--llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll12
-rw-r--r--llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll24
5 files changed, 104 insertions, 88 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e1236714a33..b62910fd5d7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1631,13 +1631,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
// These operations are handled on non-VLX by artificially widening in
// isel patterns.
- // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
- setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
+ Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Legal);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
+ Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
Subtarget.hasVLX() ? Legal : Custom);
@@ -1679,10 +1682,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
Subtarget.hasVLX() ? Legal : Custom);
- setOperationAction(ISD::FP_TO_SINT, VT, Legal);
- setOperationAction(ISD::FP_TO_UINT, VT, Legal);
- setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
- setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
+ setOperationAction(ISD::FP_TO_SINT, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
+ Subtarget.hasVLX() ? Legal : Custom);
setOperationAction(ISD::MUL, VT, Legal);
}
}
@@ -19919,7 +19926,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
- MVT VT = Op.getSimpleValueType();
+ MVT VT = Op->getSimpleValueType(0);
SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
MVT SrcVT = Src.getSimpleValueType();
SDLoc dl(Op);
@@ -19935,13 +19942,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
if (!IsSigned && !Subtarget.hasVLX()) {
+ assert(Subtarget.useAVX512Regs() && "Unexpected features!");
// Widen to 512-bits.
ResVT = MVT::v8i32;
TruncVT = MVT::v8i1;
- if (IsStrict)
- Opc = IsSigned ? ISD::STRICT_FP_TO_SINT : ISD::STRICT_FP_TO_UINT;
- else
- Opc = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
+ Opc = Op.getOpcode();
// Need to concat with zero vector for strict fp to avoid spurious
// exceptions.
// TODO: Should we just do this for non-strict as well?
@@ -19967,8 +19972,79 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
- assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
+ // v8f64->v8i32 is legal, but we need v8i32 to be custom for v8f32.
+ if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) {
+ assert(!IsSigned && "Expected unsigned conversion!");
+ assert(Subtarget.useAVX512Regs() && "Requires avx512f");
+ return Op;
+ }
+
+ // Widen vXi32 fp_to_uint with avx512f to 512-bit source.
+ if ((VT == MVT::v4i32 || VT == MVT::v8i32) &&
+ (SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v8f32)) {
+ assert(!IsSigned && "Expected unsigned conversion!");
+ assert(Subtarget.useAVX512Regs() && !Subtarget.hasVLX() &&
+ "Unexpected features!");
+ MVT WideVT = SrcVT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;
+ MVT ResVT = SrcVT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;
+ // Need to concat with zero vector for strict fp to avoid spurious
+ // exceptions.
+ // TODO: Should we just do this for non-strict as well?
+ SDValue Tmp =
+ IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT);
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
+ DAG.getIntPtrConstant(0, dl));
+
+ SDValue Res, Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, dl, {ResVT, MVT::Other},
+ {Op->getOperand(0), Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(ISD::FP_TO_UINT, dl, ResVT, Src);
+ }
+
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+ return Res;
+ }
+
+ // Widen vXi64 fp_to_uint/fp_to_sint with avx512dq to 512-bit source.
+ if ((VT == MVT::v2i64 || VT == MVT::v4i64) &&
+ (SrcVT == MVT::v2f64 || SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32)) {
+ assert(Subtarget.useAVX512Regs() && Subtarget.hasDQI() &&
+ !Subtarget.hasVLX() && "Unexpected features!");
+ MVT WideVT = SrcVT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64;
+ // Need to concat with zero vector for strict fp to avoid spurious
+ // exceptions.
+ // TODO: Should we just do this for non-strict as well?
+ SDValue Tmp =
+ IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT);
+ Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
+ DAG.getIntPtrConstant(0, dl));
+
+ SDValue Res, Chain;
+ if (IsStrict) {
+ Res = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
+ {Op->getOperand(0), Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(Op.getOpcode(), dl, MVT::v8i64, Src);
+ }
+
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+ return Res;
+ }
+
if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) {
+ assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL");
SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32));
if (IsStrict) {
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 09ac2ff3017..02ac454fe06 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -8367,23 +8367,6 @@ let Predicates = [HasDQI, HasVLX] in {
(VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
}
-let Predicates = [HasAVX512, NoVLX] in {
-def : Pat<(v8i32 (X86any_cvttp2ui (v8f32 VR256X:$src1))),
- (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
-def : Pat<(v4i32 (X86any_cvttp2ui (v4f32 VR128X:$src1))),
- (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
- (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-
-def : Pat<(v4i32 (X86any_cvttp2ui (v4f64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
- (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_xmm)>;
-}
-
let Predicates = [HasVLX] in {
def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
(VCVTDQ2PDZ128rm addr:$src)>;
@@ -8468,38 +8451,6 @@ let Predicates = [HasDQI, HasVLX] in {
(VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
}
-let Predicates = [HasDQI, NoVLX] in {
-def : Pat<(v2i64 (X86any_cvttp2si (v2f64 VR128X:$src1))),
- (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
- (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-
-def : Pat<(v4i64 (X86any_cvttp2si (v4f32 VR128X:$src1))),
- (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
- (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_ymm)>;
-
-def : Pat<(v4i64 (X86any_cvttp2si (v4f64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
- (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
-def : Pat<(v2i64 (X86any_cvttp2ui (v2f64 VR128X:$src1))),
- (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
- (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_xmm)>;
-
-def : Pat<(v4i64 (X86any_cvttp2ui (v4f32 VR128X:$src1))),
- (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
- (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
- VR128X:$src1, sub_xmm)))), sub_ymm)>;
-
-def : Pat<(v4i64 (X86any_cvttp2ui (v4f64 VR256X:$src1))),
- (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
- (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
- VR256X:$src1, sub_ymm)))), sub_ymm)>;
-}
-
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 0c31c31f6ff..22c9a4675f7 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1398,6 +1398,7 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 },
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 },
{ ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 2 },
{ ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 2 },
{ ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 },
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
index b8a863643c8..603e2165e62 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
@@ -190,10 +190,9 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
@@ -501,10 +500,9 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
@@ -2662,10 +2660,9 @@ define <4 x i32> @strict_vector_fptoui_v4f32_to_v4i32(<4 x float> %a) #0 {
; AVX-64-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
; AVX-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vmovaps %xmm0, %xmm0
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512F-NEXT: vzeroupper
@@ -2676,10 +2673,9 @@ define <4 x i32> @strict_vector_fptoui_v4f32_to_v4i32(<4 x float> %a) #0 {
; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
index d20007c0054..2b070ed3df9 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
@@ -197,10 +197,9 @@ define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptosi_v4f64_to_v4i64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
@@ -569,10 +568,9 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
@@ -746,10 +744,9 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
@@ -1118,10 +1115,9 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
@@ -1195,10 +1191,9 @@ define <4 x i32> @strict_vector_fptoui_v4f64_to_v4i32(<4 x double> %a) #0 {
; AVX-64-NEXT: vzeroupper
; AVX-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vmovaps %ymm0, %ymm0
; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
@@ -1210,10 +1205,9 @@ define <4 x i32> @strict_vector_fptoui_v4f64_to_v4i32(<4 x double> %a) #0 {
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: ret{{[l|q]}}
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptoui_v4f64_to_v4i32:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT: vzeroupper
@@ -1472,10 +1466,9 @@ define <8 x i32> @strict_vector_fptoui_v8f32_to_v8i32(<8 x float> %a) #0 {
; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512F-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vmovaps %ymm0, %ymm0
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512F-NEXT: ret{{[l|q]}}
@@ -1485,10 +1478,9 @@ define <8 x i32> @strict_vector_fptoui_v8f32_to_v8i32(<8 x float> %a) #0 {
; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
-; FIXME: This is an unsafe behavior for strict FP
; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i32:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0
; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
OpenPOWER on IntegriCloud