summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorJan Vesely <jan.vesely@rutgers.edu>2015-01-22 23:42:43 +0000
committerJan Vesely <jan.vesely@rutgers.edu>2015-01-22 23:42:43 +0000
commit5f715d36a7e04c11e1ce9b6aeb723d3b75053c40 (patch)
tree4f696c2c29c435a85c924d8553a66abd1fe9ddfb /llvm/lib
parent6269e3ca2f2d5cf7619a9be1cb49e317987bef78 (diff)
downloadbcm5719-llvm-5f715d36a7e04c11e1ce9b6aeb723d3b75053c40.tar.gz
bcm5719-llvm-5f715d36a7e04c11e1ce9b6aeb723d3b75053c40.zip
R600: Try to use lower types for 64bit division if possible
v2: add and enable tests for SI Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> Reviewed-by: Matt Arsenault <Matthew.Arsenault@amd.com> llvm-svn: 226881
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/R600/AMDGPUISelLowering.cpp50
-rw-r--r--llvm/lib/Target/R600/AMDGPUISelLowering.h2
2 files changed, 39 insertions, 13 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
index 95ef963fb21..e169f912624 100644
--- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -1661,6 +1661,20 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
+ if (VT == MVT::i64 &&
+ DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
+ DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
+
+ SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
+ LHS_Lo, RHS_Lo);
+
+ SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(0), zero);
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, Res.getValue(1), zero);
+ Results.push_back(DIV);
+ Results.push_back(REM);
+ return;
+ }
+
// Get Speculative values
SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
@@ -1722,8 +1736,8 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SDValue Den = Op.getOperand(1);
if (VT == MVT::i32) {
- if (DAG.MaskedValueIsZero(Op.getOperand(0), APInt(32, 0xff << 24)) &&
- DAG.MaskedValueIsZero(Op.getOperand(1), APInt(32, 0xff << 24))) {
+ if (DAG.MaskedValueIsZero(Num, APInt::getHighBitsSet(32, 8)) &&
+ DAG.MaskedValueIsZero(Den, APInt::getHighBitsSet(32, 8))) {
// TODO: We technically could do this for i64, but shouldn't that just be
// handled by something generally reducing 64-bit division on 32-bit
// values to 32-bit?
@@ -1835,19 +1849,31 @@ SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
- if (VT == MVT::i32) {
- if (DAG.ComputeNumSignBits(Op.getOperand(0)) > 8 &&
- DAG.ComputeNumSignBits(Op.getOperand(1)) > 8) {
- // TODO: We technically could do this for i64, but shouldn't that just be
- // handled by something generally reducing 64-bit division on 32-bit
- // values to 32-bit?
- return LowerDIVREM24(Op, DAG, true);
- }
- }
-
SDValue Zero = DAG.getConstant(0, VT);
SDValue NegOne = DAG.getConstant(-1, VT);
+ if (VT == MVT::i32 &&
+ DAG.ComputeNumSignBits(LHS) > 8 &&
+ DAG.ComputeNumSignBits(RHS) > 8) {
+ return LowerDIVREM24(Op, DAG, true);
+ }
+ if (VT == MVT::i64 &&
+ DAG.ComputeNumSignBits(LHS) > 32 &&
+ DAG.ComputeNumSignBits(RHS) > 32) {
+ EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
+
+ //HiLo split
+ SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
+ SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
+ SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
+ LHS_Lo, RHS_Lo);
+ SDValue Res[2] = {
+ DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
+ DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
+ };
+ return DAG.getMergeValues(Res, DL);
+ }
+
SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.h b/llvm/lib/Target/R600/AMDGPUISelLowering.h
index 87c40d09e8b..387a58e1e8c 100644
--- a/llvm/lib/Target/R600/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/R600/AMDGPUISelLowering.h
@@ -43,7 +43,6 @@ private:
/// \brief Split a vector store into multiple scalar stores.
/// \returns The resulting chain.
- SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
@@ -90,6 +89,7 @@ protected:
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) const;
OpenPOWER on IntegriCloud