diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-11-16 22:53:00 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-11-16 22:53:00 +0000 |
| commit | ee0333b4a9d5baf3165f09cd6fb4b520fae620e6 (patch) | |
| tree | 7aec9c72e73a4c0b98a5628bf0e66b17519e281f /llvm/lib/Target/X86/X86ISelLowering.cpp | |
| parent | ac35cd330ac06cce6a158e102fc9265f97476a7b (diff) | |
| download | bcm5719-llvm-ee0333b4a9d5baf3165f09cd6fb4b520fae620e6.tar.gz bcm5719-llvm-ee0333b4a9d5baf3165f09cd6fb4b520fae620e6.zip | |
[X86] Add custom promotion of narrow fp_to_uint/fp_to_sint operations under -x86-experimental-vector-widening-legalization.
This tries to force the result type to vXi32 followed by a truncate. This can help avoid scalarization that would otherwise occur.
There's some annoying examples of an avx512 truncate instruction followed by a packus where we should really be able to just use one truncate. But overall this is still a net improvement.
llvm-svn: 347105
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 51 |
1 files changed, 48 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5c07d7d906f..8f422ce524b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -899,10 +899,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); - // Custom legalize these to avoid over promotion. + + // Custom legalize these to avoid over promotion or custom promotion. setOperationAction(ISD::FP_TO_SINT, MVT::v2i8, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i8, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i8, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i16, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v2i8, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i8, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v8i8, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i16, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); @@ -26287,7 +26295,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, // Promote these manually to avoid over promotion to v2i64. Type // legalization will revisit the v2i32 operation for more cleanup. if ((VT == MVT::v2i8 || VT == MVT::v2i16) && - getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) { + getTypeAction(*DAG.getContext(), VT) == TypePromoteInteger) { // AVX512DQ provides instructions that produce a v2i64 result. if (Subtarget.hasDQI()) return; @@ -26302,6 +26310,43 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } + if (VT.isVector() && VT.getScalarSizeInBits() < 32) { + if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) + return; + + // Try to create a 128 bit vector, but don't exceed a 32 bit element. + unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U); + MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth), + VT.getVectorNumElements()); + unsigned Opc = N->getOpcode(); + if (PromoteVT == MVT::v2i32 || PromoteVT == MVT::v4i32) + Opc = ISD::FP_TO_SINT; + + SDValue Res = DAG.getNode(Opc, dl, PromoteVT, Src); + + // Preserve what we know about the size of the original result. Except + // when the result is v2i32 since we can't widen the assert. + if (PromoteVT != MVT::v2i32) + Res = DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext + : ISD::AssertSext, + dl, PromoteVT, Res, + DAG.getValueType(VT.getVectorElementType())); + + // Truncate back to the original width. + Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); + + // Now widen to 128 bits. + unsigned NumConcats = 128 / VT.getSizeInBits(); + MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(), + VT.getVectorNumElements() * NumConcats); + SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT)); + ConcatOps[0] = Res; + Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps); + Results.push_back(Res); + return; + } + + if (VT == MVT::v2i32) { assert((IsSigned || Subtarget.hasAVX512()) && "Can only handle signed conversion without AVX512"); |

