diff options
author | Jan Vesely <jan.vesely@rutgers.edu> | 2015-01-22 23:42:43 +0000 |
---|---|---|
committer | Jan Vesely <jan.vesely@rutgers.edu> | 2015-01-22 23:42:43 +0000 |
commit | 5f715d36a7e04c11e1ce9b6aeb723d3b75053c40 (patch) | |
tree | 4f696c2c29c435a85c924d8553a66abd1fe9ddfb /llvm/lib | |
parent | 6269e3ca2f2d5cf7619a9be1cb49e317987bef78 (diff) | |
download | bcm5719-llvm-5f715d36a7e04c11e1ce9b6aeb723d3b75053c40.tar.gz bcm5719-llvm-5f715d36a7e04c11e1ce9b6aeb723d3b75053c40.zip |
R600: Try to use lower types for 64bit division if possible
v2: add and enable tests for SI
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Matt Arsenault <Matthew.Arsenault@amd.com>
llvm-svn: 226881
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelLowering.cpp | 50 | ||||
-rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelLowering.h | 2 |
2 files changed, 39 insertions, 13 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index 95ef963fb21..e169f912624 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1661,6 +1661,20 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op, SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero); SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one); + if (VT == MVT::i64 && + DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) && + DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) { + + SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT), + LHS_Lo, RHS_Lo); + + SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(0), zero); + SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(1), zero); + Results.push_back(DIV); + Results.push_back(REM); + return; + } + // Get Speculative values SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo); SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo); @@ -1722,8 +1736,8 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, SDValue Den = Op.getOperand(1); if (VT == MVT::i32) { - if (DAG.MaskedValueIsZero(Op.getOperand(0), APInt(32, 0xff << 24)) && - DAG.MaskedValueIsZero(Op.getOperand(1), APInt(32, 0xff << 24))) { + if (DAG.MaskedValueIsZero(Num, APInt::getHighBitsSet(32, 8)) && + DAG.MaskedValueIsZero(Den, APInt::getHighBitsSet(32, 8))) { // TODO: We technically could do this for i64, but shouldn't that just be // handled by something generally reducing 64-bit division on 32-bit // values to 32-bit? @@ -1835,19 +1849,31 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); - if (VT == MVT::i32) { - if (DAG.ComputeNumSignBits(Op.getOperand(0)) > 8 && - DAG.ComputeNumSignBits(Op.getOperand(1)) > 8) { - // TODO: We technically could do this for i64, but shouldn't that just be - // handled by something generally reducing 64-bit division on 32-bit - // values to 32-bit? - return LowerDIVREM24(Op, DAG, true); - } - } - SDValue Zero = DAG.getConstant(0, VT); SDValue NegOne = DAG.getConstant(-1, VT); + if (VT == MVT::i32 && + DAG.ComputeNumSignBits(LHS) > 8 && + DAG.ComputeNumSignBits(RHS) > 8) { + return LowerDIVREM24(Op, DAG, true); + } + if (VT == MVT::i64 && + DAG.ComputeNumSignBits(LHS) > 32 && + DAG.ComputeNumSignBits(RHS) > 32) { + EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); + + //HiLo split + SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero); + SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero); + SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT), + LHS_Lo, RHS_Lo); + SDValue Res[2] = { + DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)), + DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1)) + }; + return DAG.getMergeValues(Res, DL); + } + SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT); SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT); SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign); diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.h b/llvm/lib/Target/R600/AMDGPUISelLowering.h index 87c40d09e8b..387a58e1e8c 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.h +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.h @@ -43,7 +43,6 @@ private: /// \brief Split a vector store into multiple scalar stores. /// \returns The resulting chain. - SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; @@ -90,6 +89,7 @@ protected: SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const; void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl<SDValue> &Results) const; |