diff options
author | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2018-10-25 21:47:22 +0000 |
---|---|---|
committer | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2018-10-25 21:47:22 +0000 |
commit | 4645711a8d54eb34d0040dde1748376d0992068b (patch) | |
tree | 86e3a0cb26d46634422198e718d9dc2cdddd3e65 /llvm/lib | |
parent | 2f9c42c99453b9f33d5f4311075c357134e84120 (diff) | |
download | bcm5719-llvm-4645711a8d54eb34d0040dde1748376d0992068b.tar.gz bcm5719-llvm-4645711a8d54eb34d0040dde1748376d0992068b.zip |
[SystemZ] Improve handling and cost estimates of vector integer div/rem
Enable the DAG optimization that converts vector div/rem with constants into
multiply+shifts sequences by expanding them early. This is needed since
ISD::SMUL_LOHI is 'Custom' lowered on SystemZ, and will therefore not be
available to BuildSDIV after legalization.
Better cost values for these instructions based on how they will be
implemented (a constant divisor is cheaper).
Review: Ulrich Weigand
https://reviews.llvm.org/D53196
llvm-svn: 345321
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp | 82 |
3 files changed, 69 insertions, 39 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 53cd21c4236..d86737e2192 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -527,6 +527,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::BSWAP); + setTargetDAGCombine(ISD::SDIV); + setTargetDAGCombine(ISD::UDIV); + setTargetDAGCombine(ISD::SREM); + setTargetDAGCombine(ISD::UREM); // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -5664,6 +5668,23 @@ SDValue SystemZTargetLowering::combineGET_CCMASK( return Select->getOperand(4); } +SDValue SystemZTargetLowering::combineIntDIVREM( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + // In the case where the divisor is a vector of constants a cheaper + // sequence of instructions can replace the divide. BuildSDIV is called to + // do this during DAG combining, but it only succeeds when it can build a + // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and + // since it is not Legal but Custom it can only happen before + // legalization. Therefore we must scalarize this early before Combine + // 1. For widened vectors, this is already the result of type legalization. + if (VT.isVector() && isTypeLegal(VT) && + DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1))) + return DAG.UnrollVectorOp(N); + return SDValue(); +} + SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch(N->getOpcode()) { @@ -5681,6 +5702,10 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: return combineIntDIVREM(N, DCI); } return SDValue(); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 267e31a8521..4b6be9bff0a 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -605,6 +605,7 @@ private: SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const; // If the last instruction before MBBI in MBB was some form of COMPARE, // try to replace it with a COMPARE AND BRANCH just before MBBI. diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 1eaeb9699bf..f52c9ca6e49 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -362,27 +362,33 @@ int SystemZTTIImpl::getArithmeticInstrCost( unsigned ScalarBits = Ty->getScalarSizeInBits(); - // Div with a constant which is a power of 2 will be converted by - // DAGCombiner to use shifts. With vector shift-element instructions, a - // vector sdiv costs about as much as a scalar one. - const unsigned SDivCostEstimate = 4; - bool SDivPow2 = false; - bool UDivPow2 = false; - if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) && - Args.size() == 2) { - const ConstantInt *CI = nullptr; + // There are thre cases of division and remainder: Dividing with a register + // needs a divide instruction. A divisor which is a power of two constant + // can be implemented with a sequence of shifts. Any other constant needs a + // multiply and shifts. + const unsigned DivInstrCost = 20; + const unsigned DivMulSeqCost = 10; + const unsigned SDivPow2Cost = 4; + + bool SignedDivRem = + Opcode == Instruction::SDiv || Opcode == Instruction::SRem; + bool UnsignedDivRem = + Opcode == Instruction::UDiv || Opcode == Instruction::URem; + + // Check for a constant divisor. + bool DivRemConst = false; + bool DivRemConstPow2 = false; + if ((SignedDivRem || UnsignedDivRem) && Args.size() == 2) { if (const Constant *C = dyn_cast<Constant>(Args[1])) { - if (C->getType()->isVectorTy()) - CI = dyn_cast_or_null<const ConstantInt>(C->getSplatValue()); + const ConstantInt *CVal = + (C->getType()->isVectorTy() + ? dyn_cast_or_null<const ConstantInt>(C->getSplatValue()) + : dyn_cast<const ConstantInt>(C)); + if (CVal != nullptr && + (CVal->getValue().isPowerOf2() || (-CVal->getValue()).isPowerOf2())) + DivRemConstPow2 = true; else - CI = dyn_cast<const ConstantInt>(C); - } - if (CI != nullptr && - (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) { - if (Opcode == Instruction::SDiv) - SDivPow2 = true; - else - UDivPow2 = true; + DivRemConst = true; } } @@ -394,18 +400,19 @@ int SystemZTTIImpl::getArithmeticInstrCost( // These vector operations are custom handled, but are still supported // with one instruction per vector, regardless of element size. if (Opcode == Instruction::Shl || Opcode == Instruction::LShr || - Opcode == Instruction::AShr || UDivPow2) { + Opcode == Instruction::AShr) { return NumVectors; } - if (SDivPow2) - return (NumVectors * SDivCostEstimate); - - // Temporary hack: disable high vectorization factors with integer - // division/remainder, which will get scalarized and handled with GR128 - // registers. The mischeduler is not clever enough to avoid spilling yet. - if ((Opcode == Instruction::UDiv || Opcode == Instruction::SDiv || - Opcode == Instruction::URem || Opcode == Instruction::SRem) && VF > 4) + if (DivRemConstPow2) + return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1)); + if (DivRemConst) + return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args); + if ((SignedDivRem || UnsignedDivRem) && VF > 4) + // Temporary hack: disable high vectorization factors with integer + // division/remainder, which will get scalarized and handled with + // GR128 registers. The mischeduler is not clever enough to avoid + // spilling yet. return 1000; // These FP operations are supported with a single vector instruction for @@ -471,19 +478,16 @@ int SystemZTTIImpl::getArithmeticInstrCost( return 7; // 2 * ipm sequences ; xor ; shift ; compare } - if (UDivPow2) - return 1; - if (SDivPow2) - return SDivCostEstimate; - - // An extra extension for narrow types is needed. - if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem)) + if (DivRemConstPow2) + return (SignedDivRem ? SDivPow2Cost : 1); + if (DivRemConst) + return DivMulSeqCost; + if (SignedDivRem) // sext of op(s) for narrow types - return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1)); - - if (Opcode == Instruction::UDiv || Opcode == Instruction::URem) + return DivInstrCost + (ScalarBits < 32 ? 3 : (ScalarBits == 32 ? 1 : 0)); + if (UnsignedDivRem) // Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r - return (ScalarBits < 32 ? 4 : 2); + return DivInstrCost + (ScalarBits < 32 ? 3 : 1); } // Fallback to the default implementation. |