diff options
author | Craig Topper <craig.topper@intel.com> | 2020-01-03 10:56:09 -0800 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2020-01-03 11:39:08 -0800 |
commit | 2875cc6b290cef6ec355a896f1d03ddfaa05e1f4 (patch) | |
tree | 3e634600aab8fef4009261cd448db596c3c97f54 /llvm/lib/Target | |
parent | cf48101200ee192dd82e6ed0512ae42e7b3162a9 (diff) | |
download | bcm5719-llvm-2875cc6b290cef6ec355a896f1d03ddfaa05e1f4.tar.gz bcm5719-llvm-2875cc6b290cef6ec355a896f1d03ddfaa05e1f4.zip |
[X86] Improve for v2i32->v2f64 uint_to_fp
This uses an alternative implementation of this conversion derived
from our v2i32->v2f32 handling. We can zero extend the v2i32 to
v2i64, or it with the bit representation of 2.0^52 which will give
us 2.0^52 plus the 32-bit integer since double's mantissa is 52 bits.
Then we just need to subtract 2.0^52 as a double and let the floating
point unit normalize the remaining bits into a valid double.
This is less instructions then our previous code, but does require
a port 5 shuffle for the zero extend or unpack.
Differential Revision: https://reviews.llvm.org/D71945
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 50 |
1 files changed, 14 insertions, 36 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 492c7d4894b..3d20d91a6b0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18981,43 +18981,21 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG, return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0); } - // Legalize to v4i32 type. - N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, - DAG.getUNDEF(MVT::v2i32)); + // Zero extend to 2i64, OR with the floating point representation of 2^52. + // This gives us the floating point equivalent of 2^52 + the i32 integer + // since double has 52-bits of mantissa. Then subtract 2^52 in floating + // point leaving just our i32 integers in double format. + SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i64, N0); + SDValue VBias = + DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), DL, MVT::v2f64); + SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v2i64, ZExtIn, + DAG.getBitcast(MVT::v2i64, VBias)); + Or = DAG.getBitcast(MVT::v2f64, Or); - // Same implementation as VectorLegalizer::ExpandUINT_TO_FLOAT, - // but using v2i32 to v2f64 with X86ISD::CVTSI2P. - SDValue HalfWord = DAG.getConstant(16, DL, MVT::v4i32); - SDValue HalfWordMask = DAG.getConstant(0x0000FFFF, DL, MVT::v4i32); - - // Two to the power of half-word-size. - SDValue TWOHW = DAG.getConstantFP((double)(1 << 16), DL, MVT::v2f64); - - // Clear upper part of LO, lower HI. - SDValue HI = DAG.getNode(ISD::SRL, DL, MVT::v4i32, N0, HalfWord); - SDValue LO = DAG.getNode(ISD::AND, DL, MVT::v4i32, N0, HalfWordMask); - - if (IsStrict) { - SDValue fHI = DAG.getNode(X86ISD::STRICT_CVTSI2P, DL, - {MVT::v2f64, MVT::Other}, {Op.getOperand(0), HI}); - fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {MVT::v2f64, MVT::Other}, - {fHI.getValue(1), fHI, TWOHW}); - SDValue fLO = DAG.getNode(X86ISD::STRICT_CVTSI2P, DL, - {MVT::v2f64, MVT::Other}, {Op.getOperand(0), LO}); - SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - fHI.getValue(1), fLO.getValue(1)); - - // Add the two halves - return DAG.getNode(ISD::STRICT_FADD, DL, {MVT::v2f64, MVT::Other}, - {Chain, fHI, fLO}); - } - - SDValue fHI = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, HI); - fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW); - SDValue fLO = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, LO); - - // Add the two halves. - return DAG.getNode(ISD::FADD, DL, MVT::v2f64, fHI, fLO); + if (IsStrict) + return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v2f64, MVT::Other}, + {Op.getOperand(0), Or, VBias}); + return DAG.getNode(ISD::FSUB, DL, MVT::v2f64, Or, VBias); } static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, |