summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp25
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h1
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp82
3 files changed, 69 insertions, 39 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 53cd21c4236..d86737e2192 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -527,6 +527,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::BSWAP);
+ setTargetDAGCombine(ISD::SDIV);
+ setTargetDAGCombine(ISD::UDIV);
+ setTargetDAGCombine(ISD::SREM);
+ setTargetDAGCombine(ISD::UREM);
// Handle intrinsics.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -5664,6 +5668,23 @@ SDValue SystemZTargetLowering::combineGET_CCMASK(
return Select->getOperand(4);
}
+SDValue SystemZTargetLowering::combineIntDIVREM(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ // In the case where the divisor is a vector of constants a cheaper
+ // sequence of instructions can replace the divide. BuildSDIV is called to
+ // do this during DAG combining, but it only succeeds when it can build a
+ // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
+ // since it is not Legal but Custom it can only happen before
+ // legalization. Therefore we must scalarize this early before Combine
+ // 1. For widened vectors, this is already the result of type legalization.
+ if (VT.isVector() && isTypeLegal(VT) &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
+ return DAG.UnrollVectorOp(N);
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch(N->getOpcode()) {
@@ -5681,6 +5702,10 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM: return combineIntDIVREM(N, DCI);
}
return SDValue();
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 267e31a8521..4b6be9bff0a 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -605,6 +605,7 @@ private:
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineGET_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineIntDIVREM(SDNode *N, DAGCombinerInfo &DCI) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 1eaeb9699bf..f52c9ca6e49 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -362,27 +362,33 @@ int SystemZTTIImpl::getArithmeticInstrCost(
unsigned ScalarBits = Ty->getScalarSizeInBits();
- // Div with a constant which is a power of 2 will be converted by
- // DAGCombiner to use shifts. With vector shift-element instructions, a
- // vector sdiv costs about as much as a scalar one.
- const unsigned SDivCostEstimate = 4;
- bool SDivPow2 = false;
- bool UDivPow2 = false;
- if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) &&
- Args.size() == 2) {
- const ConstantInt *CI = nullptr;
+ // There are thre cases of division and remainder: Dividing with a register
+ // needs a divide instruction. A divisor which is a power of two constant
+ // can be implemented with a sequence of shifts. Any other constant needs a
+ // multiply and shifts.
+ const unsigned DivInstrCost = 20;
+ const unsigned DivMulSeqCost = 10;
+ const unsigned SDivPow2Cost = 4;
+
+ bool SignedDivRem =
+ Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
+ bool UnsignedDivRem =
+ Opcode == Instruction::UDiv || Opcode == Instruction::URem;
+
+ // Check for a constant divisor.
+ bool DivRemConst = false;
+ bool DivRemConstPow2 = false;
+ if ((SignedDivRem || UnsignedDivRem) && Args.size() == 2) {
if (const Constant *C = dyn_cast<Constant>(Args[1])) {
- if (C->getType()->isVectorTy())
- CI = dyn_cast_or_null<const ConstantInt>(C->getSplatValue());
+ const ConstantInt *CVal =
+ (C->getType()->isVectorTy()
+ ? dyn_cast_or_null<const ConstantInt>(C->getSplatValue())
+ : dyn_cast<const ConstantInt>(C));
+ if (CVal != nullptr &&
+ (CVal->getValue().isPowerOf2() || (-CVal->getValue()).isPowerOf2()))
+ DivRemConstPow2 = true;
else
- CI = dyn_cast<const ConstantInt>(C);
- }
- if (CI != nullptr &&
- (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) {
- if (Opcode == Instruction::SDiv)
- SDivPow2 = true;
- else
- UDivPow2 = true;
+ DivRemConst = true;
}
}
@@ -394,18 +400,19 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// These vector operations are custom handled, but are still supported
// with one instruction per vector, regardless of element size.
if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
- Opcode == Instruction::AShr || UDivPow2) {
+ Opcode == Instruction::AShr) {
return NumVectors;
}
- if (SDivPow2)
- return (NumVectors * SDivCostEstimate);
-
- // Temporary hack: disable high vectorization factors with integer
- // division/remainder, which will get scalarized and handled with GR128
- // registers. The mischeduler is not clever enough to avoid spilling yet.
- if ((Opcode == Instruction::UDiv || Opcode == Instruction::SDiv ||
- Opcode == Instruction::URem || Opcode == Instruction::SRem) && VF > 4)
+ if (DivRemConstPow2)
+ return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
+ if (DivRemConst)
+ return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args);
+ if ((SignedDivRem || UnsignedDivRem) && VF > 4)
+ // Temporary hack: disable high vectorization factors with integer
+ // division/remainder, which will get scalarized and handled with
+ // GR128 registers. The mischeduler is not clever enough to avoid
+ // spilling yet.
return 1000;
// These FP operations are supported with a single vector instruction for
@@ -471,19 +478,16 @@ int SystemZTTIImpl::getArithmeticInstrCost(
return 7; // 2 * ipm sequences ; xor ; shift ; compare
}
- if (UDivPow2)
- return 1;
- if (SDivPow2)
- return SDivCostEstimate;
-
- // An extra extension for narrow types is needed.
- if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
+ if (DivRemConstPow2)
+ return (SignedDivRem ? SDivPow2Cost : 1);
+ if (DivRemConst)
+ return DivMulSeqCost;
+ if (SignedDivRem)
// sext of op(s) for narrow types
- return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1));
-
- if (Opcode == Instruction::UDiv || Opcode == Instruction::URem)
+ return DivInstrCost + (ScalarBits < 32 ? 3 : (ScalarBits == 32 ? 1 : 0));
+ if (UnsignedDivRem)
// Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r
- return (ScalarBits < 32 ? 4 : 2);
+ return DivInstrCost + (ScalarBits < 32 ? 3 : 1);
}
// Fallback to the default implementation.
OpenPOWER on IntegriCloud