summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp22
-rw-r--r--llvm/test/Analysis/CostModel/AArch64/div_cte.ll45
2 files changed, 67 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 316ea048436..098272dc2e2 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -520,6 +520,28 @@ int AArch64TTIImpl::getArithmeticInstrCost(
}
LLVM_FALLTHROUGH;
case ISD::UDIV:
+ if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) {
+ auto VT = TLI->getValueType(DL, Ty);
+ if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
+ // Vector signed division by constant are expanded to the
+ // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
+ // to MULHS + SUB + SRL + ADD + SRL.
+ int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
+ Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
+ Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
+ Opd2Info,
+ TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
+ }
+ }
+
Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
Opd1PropInfo, Opd2PropInfo);
if (Ty->isVectorTy()) {
diff --git a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
new file mode 100644
index 00000000000..ab7fc23cdad
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
@@ -0,0 +1,45 @@
+; RUN: opt -cost-model -analyze -mtriple=aarch64-linux-gnu -mattr=+neon < %s | FileCheck %s
+
+; Verify the cost of integer division by constant.
+
+define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
+; CHECK-LABEL: 'Cost Model Analysis' for function 'sdiv8xi16':
+; CHECK: Found an estimated cost of 9 for instruction: %div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+ %div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+ ret <16 x i8> %div
+}
+
+define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
+; CHECK-LABEL: 'Cost Model Analysis' for function 'sdiv16xi8':
+; CHECK: Found an estimated cost of 9 for instruction: %div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+ %div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+ ret <8 x i16> %div
+}
+
+define <4 x i32> @sdiv32xi4(<4 x i32> %x) {
+; CHECK-LABEL: 'Cost Model Analysis' for function 'sdiv32xi4':
+; CHECK: Found an estimated cost of 9 for instruction: %div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
+ %div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
+ ret <4 x i32> %div
+}
+
+define <16 x i8> @udiv8xi16(<16 x i8> %x) {
+; CHECK-LABEL: 'Cost Model Analysis' for function 'udiv8xi16':
+; CHECK: Found an estimated cost of 9 for instruction: %div = udiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+ %div = udiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+ ret <16 x i8> %div
+}
+
+define <8 x i16> @udiv16xi8(<8 x i16> %x) {
+; CHECK-LABEL: 'Cost Model Analysis' for function 'udiv16xi8':
+; CHECK: Found an estimated cost of 9 for instruction: %div = udiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+ %div = udiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+ ret <8 x i16> %div
+}
+
+define <4 x i32> @udiv32xi4(<4 x i32> %x) {
+; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'udiv32xi4':
+; CHECK: Found an estimated cost of 9 for instruction: %div = udiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
+ %div = udiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
+ ret <4 x i32> %div
+}
OpenPOWER on IntegriCloud