diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-10-25 11:15:57 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-10-25 11:15:57 +0000 |
commit | 838eb24014f3493cd3a96e3ca759151cd4485601 (patch) | |
tree | 8f365e05abfcddfa6bf217a603e57bd0f1f7543e /llvm/lib/CodeGen/SelectionDAG | |
parent | 6ca3609418db6f6cfabd1e89b50448cc26dbae5c (diff) | |
download | bcm5719-llvm-838eb24014f3493cd3a96e3ca759151cd4485601.tar.gz bcm5719-llvm-838eb24014f3493cd3a96e3ca759151cd4485601.zip |
[TargetLowering] Improve vXi64 UINT_TO_FP vXf64 support (P38226)
As suggested on D52965, this patch moves the i64 to f64 UINT_TO_FP expansion code from LegalizeDAG into TargetLowering and makes it available to LegalizeVectorOps as well.
Not only does this help perform X86 lowering as a true vectorization instead of (partially vectorized) scalar conversions, it avoids the HADDPD op from the scalar code which can be slow on most targets.
The AVX512F does have the vcvtusi2sdq scalar operation but we don't unroll to use it as it seems to only help for the v2f64 case - otherwise the unrolling cost will certainly be too high. My feeling is that we should leave it to the vectorizers - and if it generates the vector UINT_TO_FP we should use it.
Differential Revision: https://reviews.llvm.org/D53649
llvm-svn: 345256
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 31 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 42 |
3 files changed, 53 insertions, 25 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f6a6e064fa4..413a53d2e6e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2369,30 +2369,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); // Code below here assumes !isSigned without checking again. - // Implementation of unsigned i64 to f64 following the algorithm in - // __floatundidf in compiler_rt. This implementation has the advantage - // of performing rounding correctly, both in the default rounding mode - // and in all alternate rounding modes. - // TODO: Generalize this for use with other types. - if (SrcVT == MVT::i64 && DestVT == MVT::f64) { - LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f64\n"); - SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT); - SDValue TwoP84PlusTwoP52 = DAG.getConstantFP( - BitsToDouble(UINT64_C(0x4530000000100000)), dl, DestVT); - SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT); - SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT); - SDValue HiShift = DAG.getConstant(32, dl, ShiftVT); - - SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Op0, LoMask); - SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Op0, HiShift); - SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52); - SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); - SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, DestVT, LoOr); - SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, DestVT, HiOr); - SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DestVT, HiFlt, TwoP84PlusTwoP52); - return DAG.getNode(ISD::FADD, dl, DestVT, LoFlt, HiSub); - } - // TODO: Generalize this for use with other types. if (SrcVT == MVT::i64 && DestVT == MVT::f32) { LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f32\n"); @@ -2921,8 +2897,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) { + Results.push_back(Tmp1); + break; + } + LLVM_FALLTHROUGH + case ISD::SINT_TO_FP: Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, Node->getOperand(0), Node->getValueType(0), dl); Results.push_back(Tmp1); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 850cdcd1701..6554d5a27b2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1022,6 +1022,11 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { EVT VT = Op.getOperand(0).getValueType(); SDLoc DL(Op); + // Attempt to expand using TargetLowering. + SDValue Result; + if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG)) + return Result; + // Make sure that the SINT_TO_FP and SRL instructions are available. if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 0189a11fa1d..1a29cb7ebf7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4137,6 +4137,48 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, return true; } +bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + SDValue Src = Node->getOperand(0); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Node->getValueType(0); + + if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64) + return false; + + // Only expand vector types if we have the appropriate vector bit operations. + if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) || + !isOperationLegalOrCustom(ISD::FADD, DstVT) || + !isOperationLegalOrCustom(ISD::FSUB, DstVT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT))) + return false; + + SDLoc dl(SDValue(Node, 0)); + EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout()); + + // Implementation of unsigned i64 to f64 following the algorithm in + // __floatundidf in compiler_rt. This implementation has the advantage + // of performing rounding correctly, both in the default rounding mode + // and in all alternate rounding modes. + SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT); + SDValue TwoP84PlusTwoP52 = + DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT); + SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT); + SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT); + SDValue HiShift = DAG.getConstant(32, dl, ShiftVT); + + SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask); + SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift); + SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52); + SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); + SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); + SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); + Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); + return true; +} + SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); |