summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2020-01-03 10:56:09 -0800
committerCraig Topper <craig.topper@intel.com>2020-01-03 11:39:08 -0800
commit2875cc6b290cef6ec355a896f1d03ddfaa05e1f4 (patch)
tree3e634600aab8fef4009261cd448db596c3c97f54 /llvm/lib/Target
parentcf48101200ee192dd82e6ed0512ae42e7b3162a9 (diff)
downloadbcm5719-llvm-2875cc6b290cef6ec355a896f1d03ddfaa05e1f4.tar.gz
bcm5719-llvm-2875cc6b290cef6ec355a896f1d03ddfaa05e1f4.zip
[X86] Improve for v2i32->v2f64 uint_to_fp
This uses an alternative implementation of this conversion derived from our v2i32->v2f32 handling. We can zero extend the v2i32 to v2i64, or it with the bit representation of 2.0^52 which will give us 2.0^52 plus the 32-bit integer since double's mantissa is 52 bits. Then we just need to subtract 2.0^52 as a double and let the floating point unit normalize the remaining bits into a valid double. This is less instructions then our previous code, but does require a port 5 shuffle for the zero extend or unpack. Differential Revision: https://reviews.llvm.org/D71945
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp50
1 files changed, 14 insertions, 36 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 492c7d4894b..3d20d91a6b0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18981,43 +18981,21 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0);
}
- // Legalize to v4i32 type.
- N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
- DAG.getUNDEF(MVT::v2i32));
+ // Zero extend to 2i64, OR with the floating point representation of 2^52.
+ // This gives us the floating point equivalent of 2^52 + the i32 integer
+ // since double has 52-bits of mantissa. Then subtract 2^52 in floating
+ // point leaving just our i32 integers in double format.
+ SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i64, N0);
+ SDValue VBias =
+ DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), DL, MVT::v2f64);
+ SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v2i64, ZExtIn,
+ DAG.getBitcast(MVT::v2i64, VBias));
+ Or = DAG.getBitcast(MVT::v2f64, Or);
- // Same implementation as VectorLegalizer::ExpandUINT_TO_FLOAT,
- // but using v2i32 to v2f64 with X86ISD::CVTSI2P.
- SDValue HalfWord = DAG.getConstant(16, DL, MVT::v4i32);
- SDValue HalfWordMask = DAG.getConstant(0x0000FFFF, DL, MVT::v4i32);
-
- // Two to the power of half-word-size.
- SDValue TWOHW = DAG.getConstantFP((double)(1 << 16), DL, MVT::v2f64);
-
- // Clear upper part of LO, lower HI.
- SDValue HI = DAG.getNode(ISD::SRL, DL, MVT::v4i32, N0, HalfWord);
- SDValue LO = DAG.getNode(ISD::AND, DL, MVT::v4i32, N0, HalfWordMask);
-
- if (IsStrict) {
- SDValue fHI = DAG.getNode(X86ISD::STRICT_CVTSI2P, DL,
- {MVT::v2f64, MVT::Other}, {Op.getOperand(0), HI});
- fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {MVT::v2f64, MVT::Other},
- {fHI.getValue(1), fHI, TWOHW});
- SDValue fLO = DAG.getNode(X86ISD::STRICT_CVTSI2P, DL,
- {MVT::v2f64, MVT::Other}, {Op.getOperand(0), LO});
- SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- fHI.getValue(1), fLO.getValue(1));
-
- // Add the two halves
- return DAG.getNode(ISD::STRICT_FADD, DL, {MVT::v2f64, MVT::Other},
- {Chain, fHI, fLO});
- }
-
- SDValue fHI = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, HI);
- fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW);
- SDValue fLO = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, LO);
-
- // Add the two halves.
- return DAG.getNode(ISD::FADD, DL, MVT::v2f64, fHI, fLO);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v2f64, MVT::Other},
+ {Op.getOperand(0), Or, VBias});
+ return DAG.getNode(ISD::FSUB, DL, MVT::v2f64, Or, VBias);
}
static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
OpenPOWER on IntegriCloud