summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp82
1 files changed, 43 insertions, 39 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 1eaeb9699bf..f52c9ca6e49 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -362,27 +362,33 @@ int SystemZTTIImpl::getArithmeticInstrCost(
unsigned ScalarBits = Ty->getScalarSizeInBits();
- // Div with a constant which is a power of 2 will be converted by
- // DAGCombiner to use shifts. With vector shift-element instructions, a
- // vector sdiv costs about as much as a scalar one.
- const unsigned SDivCostEstimate = 4;
- bool SDivPow2 = false;
- bool UDivPow2 = false;
- if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv) &&
- Args.size() == 2) {
- const ConstantInt *CI = nullptr;
+ // There are thre cases of division and remainder: Dividing with a register
+ // needs a divide instruction. A divisor which is a power of two constant
+ // can be implemented with a sequence of shifts. Any other constant needs a
+ // multiply and shifts.
+ const unsigned DivInstrCost = 20;
+ const unsigned DivMulSeqCost = 10;
+ const unsigned SDivPow2Cost = 4;
+
+ bool SignedDivRem =
+ Opcode == Instruction::SDiv || Opcode == Instruction::SRem;
+ bool UnsignedDivRem =
+ Opcode == Instruction::UDiv || Opcode == Instruction::URem;
+
+ // Check for a constant divisor.
+ bool DivRemConst = false;
+ bool DivRemConstPow2 = false;
+ if ((SignedDivRem || UnsignedDivRem) && Args.size() == 2) {
if (const Constant *C = dyn_cast<Constant>(Args[1])) {
- if (C->getType()->isVectorTy())
- CI = dyn_cast_or_null<const ConstantInt>(C->getSplatValue());
+ const ConstantInt *CVal =
+ (C->getType()->isVectorTy()
+ ? dyn_cast_or_null<const ConstantInt>(C->getSplatValue())
+ : dyn_cast<const ConstantInt>(C));
+ if (CVal != nullptr &&
+ (CVal->getValue().isPowerOf2() || (-CVal->getValue()).isPowerOf2()))
+ DivRemConstPow2 = true;
else
- CI = dyn_cast<const ConstantInt>(C);
- }
- if (CI != nullptr &&
- (CI->getValue().isPowerOf2() || (-CI->getValue()).isPowerOf2())) {
- if (Opcode == Instruction::SDiv)
- SDivPow2 = true;
- else
- UDivPow2 = true;
+ DivRemConst = true;
}
}
@@ -394,18 +400,19 @@ int SystemZTTIImpl::getArithmeticInstrCost(
// These vector operations are custom handled, but are still supported
// with one instruction per vector, regardless of element size.
if (Opcode == Instruction::Shl || Opcode == Instruction::LShr ||
- Opcode == Instruction::AShr || UDivPow2) {
+ Opcode == Instruction::AShr) {
return NumVectors;
}
- if (SDivPow2)
- return (NumVectors * SDivCostEstimate);
-
- // Temporary hack: disable high vectorization factors with integer
- // division/remainder, which will get scalarized and handled with GR128
- // registers. The mischeduler is not clever enough to avoid spilling yet.
- if ((Opcode == Instruction::UDiv || Opcode == Instruction::SDiv ||
- Opcode == Instruction::URem || Opcode == Instruction::SRem) && VF > 4)
+ if (DivRemConstPow2)
+ return (NumVectors * (SignedDivRem ? SDivPow2Cost : 1));
+ if (DivRemConst)
+ return VF * DivMulSeqCost + getScalarizationOverhead(Ty, Args);
+ if ((SignedDivRem || UnsignedDivRem) && VF > 4)
+ // Temporary hack: disable high vectorization factors with integer
+ // division/remainder, which will get scalarized and handled with
+ // GR128 registers. The mischeduler is not clever enough to avoid
+ // spilling yet.
return 1000;
// These FP operations are supported with a single vector instruction for
@@ -471,19 +478,16 @@ int SystemZTTIImpl::getArithmeticInstrCost(
return 7; // 2 * ipm sequences ; xor ; shift ; compare
}
- if (UDivPow2)
- return 1;
- if (SDivPow2)
- return SDivCostEstimate;
-
- // An extra extension for narrow types is needed.
- if ((Opcode == Instruction::SDiv || Opcode == Instruction::SRem))
+ if (DivRemConstPow2)
+ return (SignedDivRem ? SDivPow2Cost : 1);
+ if (DivRemConst)
+ return DivMulSeqCost;
+ if (SignedDivRem)
// sext of op(s) for narrow types
- return (ScalarBits < 32 ? 4 : (ScalarBits == 32 ? 2 : 1));
-
- if (Opcode == Instruction::UDiv || Opcode == Instruction::URem)
+ return DivInstrCost + (ScalarBits < 32 ? 3 : (ScalarBits == 32 ? 1 : 0));
+ if (UnsignedDivRem)
// Clearing of low 64 bit reg + sext of op(s) for narrow types + dl[g]r
- return (ScalarBits < 32 ? 4 : 2);
+ return DivInstrCost + (ScalarBits < 32 ? 3 : 1);
}
// Fallback to the default implementation.
OpenPOWER on IntegriCloud