diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp | 82 |
3 files changed, 69 insertions, 39 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 53cd21c4236..d86737e2192 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -527,6 +527,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::BSWAP); + setTargetDAGCombine(ISD::SDIV); + setTargetDAGCombine(ISD::UDIV); + setTargetDAGCombine(ISD::SREM); + setTargetDAGCombine(ISD::UREM); // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -5664,6 +5668,23 @@ SDValue SystemZTargetLowering::combineGET_CCMASK( return Select->getOperand(4); } +SDValue SystemZTargetLowering::combineIntDIVREM( + SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + // In the case where the divisor is a vector of constants a cheaper + // sequence of instructions can replace the divide. BuildSDIV is called to + // do this during DAG combining, but it only succeeds when it can build a + // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and + // since it is not Legal but Custom it can only happen before + // legalization. Therefore we must scalarize this early before Combine + // 1. For widened vectors, this is already the result of type legalization. + if (VT.isVector() && isTypeLegal(VT) && + DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1))) + return DAG.UnrollVectorOp(N); + return SDValue(); +} + SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch(N->getOpcode()) { @@ -5681,6 +5702,10 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: return combineIntDIVREM(N, DCI); } return SDValue(); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h index 267e31a8521..4b6be9bff0a 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -605,6 +605,7 @@ private: SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const; // If the last instruction before MBBI in MBB was some form of COMPARE, // try to replace it with a COMPARE AND BRANCH just before MBBI. diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 1eaeb9699bf..f52c9ca6e49 100644 --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -362,27 +362,33 @@ int SystemZTTIImpl::getArithmeticInstrCost( unsigned ScalarBits = Ty->getScalarSizeInBits(); - // Div with a constant which is a power of 2 will be converted by - // DAGCombiner to use shifts. With vector shift-element instructions, a - // vector sdiv costs about as much as a scalar one. - const unsigned SDivCostEstimate = 4; - bool SDivPow2 = false; - bool UDivPow2 = false; - if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) && - Args.size() == 2) { - const ConstantInt *CI = nullptr; + // There are thre cases of division and remainder: Dividing with a register + // needs a divide instruction. A divisor which is a power of two constant + // can be implemented with a sequence of shifts. Any other constant needs a + // multiply and shifts. + const unsigned DivInstrCost = 20; + const unsigned DivMulSeqCost = 10; + const unsigned SDivPow2Cost = 4; + + bool SignedDivRem = + Opcode == Instruction::SDiv || Opcode == Instruction::SRem; + bool UnsignedDivRem = + Opcode == Instruction::UDiv || Opcode == Instruction::URem; + + // Check for a constant divisor. + bool DivRemConst = false; + bool DivRemConstPow2 = false; + if ((SignedDivRem || UnsignedDivRem) && Args.size() == 2) { if (const Constant *C = dyn_cast<Constant>(Args[1])) { - if (C->getType()->isVectorTy()) - CI = dyn_cast_or_null<const ConstantInt>(C->getSplatValue()); + const ConstantInt *CVal = + (C->getType()->isVectorTy() + ? dyn_cast_or_null<const ConstantInt>(C->getSplatValue()) + : dyn_cast<const ConstantInt>(C)); + if (CVal != nullptr && + (CVal->getValue().isPowerOf2() || (-CVal->getValue()).isPowerOf2())) + DivRemConstPow2 = true; else - CI = dyn_cast<const ConstantInt>(C); - } - if (CI != nullptr && - (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) { - if (Opcode == Instruction::SDiv) - SDivPow2 = true; - else - UDivPow2 = true; + DivRemConst = true; } } @@ -394,18 +400,19 @@ int SystemZTTIImpl::getArithmeticInstrCost( // These vector operations are custom handled, but are still supported // with one instruction per vector, regardless of element size. if (Opcode == Instruction::Shl || Opcode == Instruction::LShr || - Opcode == Instruction::AShr || UDivPow2) { + Opcode == Instruction::AShr) { return NumVectors; } - if (SDivPow2) - return (NumVectors * SDivCostEstimate); - - // Temporary hack: disable high vectorization factors with integer - // division/remainder, which will get scalarized and handled with GR128 - // registers. The mischeduler is not clever enough to avoid spilling yet. - if ((Opcode == Instruction::UDiv || Opcode == Instruction::SDiv || - Opcode == Instruction::URem || Opcode == Instruction::SRem) && VF > 4) + if (DivRemConstPow2) + return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1)); + if (DivRemConst) + return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args); + if ((SignedDivRem || UnsignedDivRem) && VF > 4) + // Temporary hack: disable high vectorization factors with integer + // division/remainder, which will get scalarized and handled with + // GR128 registers. The mischeduler is not clever enough to avoid + // spilling yet. return 1000; // These FP operations are supported with a single vector instruction for @@ -471,19 +478,16 @@ int SystemZTTIImpl::getArithmeticInstrCost( return 7; // 2 * ipm sequences ; xor ; shift ; compare } - if (UDivPow2) - return 1; - if (SDivPow2) - return SDivCostEstimate; - - // An extra extension for narrow types is needed. - if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem)) + if (DivRemConstPow2) + return (SignedDivRem ? SDivPow2Cost : 1); + if (DivRemConst) + return DivMulSeqCost; + if (SignedDivRem) // sext of op(s) for narrow types - return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1)); - - if (Opcode == Instruction::UDiv || Opcode == Instruction::URem) + return DivInstrCost + (ScalarBits < 32 ? 3 : (ScalarBits == 32 ? 1 : 0)); + if (UnsignedDivRem) // Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r - return (ScalarBits < 32 ? 4 : 2); + return DivInstrCost + (ScalarBits < 32 ? 3 : 1); } // Fallback to the default implementation. |