diff options
author | Liu, Chen3 <chen3.liu@intel.com> | 2019-12-26 17:47:37 +0800 |
---|---|---|
committer | Liu, Chen3 <chen3.liu@intel.com> | 2019-12-27 08:28:33 +0800 |
commit | 1a7b69f5dd32980a7e0b0841a99dc65b2b887203 (patch) | |
tree | 55f9667a097e1b7980b836c657d58b41ed392afa /llvm/lib/Target | |
parent | 78f714f824fac8aa3fdd85908c41538bccefb959 (diff) | |
download | bcm5719-llvm-1a7b69f5dd32980a7e0b0841a99dc65b2b887203.tar.gz bcm5719-llvm-1a7b69f5dd32980a7e0b0841a99dc65b2b887203.zip |
add custom operation for strict fpextend/fpround
Differential Revision: https://reviews.llvm.org/D71892
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 32 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 21 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 12 |
5 files changed, 57 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index dd73f8a7068..94e43393eab 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1004,7 +1004,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom); setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom); // We want to legalize this to an f64 load rather than an i64 load on // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for @@ -20080,12 +20082,13 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { } assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); - // FIXME: Strict fp. - assert(!IsStrict && "Strict FP not supported yet!"); - return DAG.getNode(X86ISD::VFPEXT, DL, VT, - DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, - In, DAG.getUNDEF(SVT))); + SDValue Res = + DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, In, DAG.getUNDEF(SVT)); + if (IsStrict) + return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other}, + {Op->getOperand(0), Res}); + return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res); } SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { @@ -28938,11 +28941,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); return; } + case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: { - if (!isTypeLegal(N->getOperand(0).getValueType())) - return; - SDValue V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Src = N->getOperand(IsStrict ? 1 : 0); + if (!isTypeLegal(Src.getValueType())) + return; + SDValue V; + if (IsStrict) + V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other}, + {N->getOperand(0), N->getOperand(1)}); + else + V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); Results.push_back(V); + if (IsStrict) + Results.push_back(V.getValue(1)); return; } case ISD::FP_EXTEND: { @@ -29380,10 +29393,12 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES"; case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; + case X86ISD::STRICT_VFPEXT: return "X86ISD::STRICT_VFPEXT"; case X86ISD::VFPEXT_SAE: return "X86ISD::VFPEXT_SAE"; case X86ISD::VFPEXTS: return "X86ISD::VFPEXTS"; case X86ISD::VFPEXTS_SAE: return "X86ISD::VFPEXTS_SAE"; case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; + case X86ISD::STRICT_VFPROUND: return "X86ISD::STRICT_VFPROUND"; case X86ISD::VMFPROUND: return "X86ISD::VMFPROUND"; case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND"; case X86ISD::VFPROUNDS: return "X86ISD::VFPROUNDS"; @@ -34983,6 +34998,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, case X86ISD::STRICT_CVTTP2UI: case X86ISD::STRICT_CVTSI2P: case X86ISD::STRICT_CVTUI2P: + case X86ISD::STRICT_VFPROUND: if (In.getOperand(1).getValueType() == MVT::v2f64 || In.getOperand(1).getValueType() == MVT::v2i64) return N->getOperand(0); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 18af57156a3..2396cf65fd8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -296,10 +296,10 @@ namespace llvm { VMTRUNC, VMTRUNCUS, VMTRUNCS, // Vector FP extend. - VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE, + VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE, STRICT_VFPEXT, // Vector FP round. - VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND, + VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND, STRICT_VFPROUND, // Masked version of above. Used for v2f64->v4f32. // SRC, PASSTHRU, MASK diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 36f7980a08f..09ac2ff3017 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7589,7 +7589,7 @@ multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info, - X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; + X86any_vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, any_fpextend, sched.YMM>, EVEX_V256; } @@ -7719,7 +7719,7 @@ let Predicates = [HasVLX] in { // Special patterns to allow use of X86vmfpround for masking. Instruction // patterns have been disabled with null_frag. - def : Pat<(X86vfpround (v2f64 VR128X:$src)), + def : Pat<(X86any_vfpround (v2f64 VR128X:$src)), (VCVTPD2PSZ128rr VR128X:$src)>; def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0), VK2WM:$mask), @@ -7728,7 +7728,7 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; - def : Pat<(X86vfpround (loadv2f64 addr:$src)), + def : Pat<(X86any_vfpround (loadv2f64 addr:$src)), (VCVTPD2PSZ128rm addr:$src)>; def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0), VK2WM:$mask), @@ -7737,7 +7737,7 @@ let Predicates = [HasVLX] in { VK2WM:$mask), (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(X86vfpround (v2f64 (X86VBroadcastld64 addr:$src))), + def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))), (VCVTPD2PSZ128rmb addr:$src)>; def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index a04c493675a..37cba895c37 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -127,11 +127,32 @@ def X86vfpext : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>, SDTCVecEltisVT<1, f32>, SDTCisSameSizeAs<0, 1>]>>; + +def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT", + SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>, + SDTCVecEltisVT<1, f32>, + SDTCisSameSizeAs<0, 1>]>, + [SDNPHasChain]>; + +def X86any_vfpext : PatFrags<(ops node:$src), + [(X86vfpext node:$src), + (X86strict_vfpext node:$src)]>; + def X86vfpround: SDNode<"X86ISD::VFPROUND", SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, SDTCVecEltisVT<1, f64>, SDTCisOpSmallerThanOp<0, 1>]>>; +def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND", + SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, + SDTCVecEltisVT<1, f64>, + SDTCisOpSmallerThanOp<0, 1>]>, + [SDNPHasChain]>; + +def X86any_vfpround : PatFrags<(ops node:$src), + [(X86vfpround node:$src), + (X86strict_vfpround node:$src)]>; + def X86frounds : SDNode<"X86ISD::VFPROUNDS", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>, SDTCisSameAs<0, 1>, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index c218acc6e36..c7ecfba5b24 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1617,7 +1617,7 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>, + [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", @@ -1636,7 +1636,7 @@ def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))]>, + [(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>, PS, Sched<[WriteCvtPS2PD]>; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", @@ -1708,11 +1708,11 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in { // XMM only def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>, + [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>, VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG; def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps{x}\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))]>, + [(set VR128:$dst, (X86any_vfpround (loadv2f64 addr:$src)))]>, VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG; def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), @@ -1732,11 +1732,11 @@ def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}", def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))]>, + [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>, Sched<[WriteCvtPD2PS]>, SIMD_EXC; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))]>, + [(set VR128:$dst, (X86any_vfpround (memopv2f64 addr:$src)))]>, Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC; let Predicates = [HasAVX, NoVLX] in { |