summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEli Friedman <efriedma@codeaurora.org>2017-12-22 02:08:08 +0000
committerEli Friedman <efriedma@codeaurora.org>2017-12-22 02:08:08 +0000
commit39ed9a602bff45e86c9249d5139504b398bb4ef4 (patch)
tree5355c96d10ab486759bccab89fa89581cefc407d
parenta17f220590edc34b6c59c385c59312b3e9da524c (diff)
downloadbcm5719-llvm-39ed9a602bff45e86c9249d5139504b398bb4ef4.tar.gz
bcm5719-llvm-39ed9a602bff45e86c9249d5139504b398bb4ef4.zip
[Inliner] Restrict soft-float inlining penalty.
The penalty is currently getting applied in a bunch of places where it doesn't make sense, like bitcasts (which are free) and calls (which were getting the call penalty applied twice). Instead, just apply the penalty to binary operators and floating-point casts. While I'm here, also fix getFPOpCost() to do the right thing in more cases, so we don't have to dig into function attributes. Differential Revision: https://reviews.llvm.org/D41522 llvm-svn: 321332
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h10
-rw-r--r--llvm/lib/Analysis/InlineCost.cpp34
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp19
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h2
-rw-r--r--llvm/test/Transforms/Inline/ARM/inline-fp.ll113
5 files changed, 143 insertions, 35 deletions
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index bb5e7f9e8e3..526ddb1b970 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -302,9 +302,13 @@ public:
}
unsigned getFPOpCost(Type *Ty) {
- // By default, FP instructions are no more expensive since they are
- // implemented in HW. Target specific TTI can override this.
- return TargetTransformInfo::TCC_Basic;
+ // Check whether FADD is available, as a proxy for floating-point in
+ // general.
+ const TargetLoweringBase *TLI = getTLI();
+ EVT VT = TLI->getValueType(DL, Ty);
+ if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
+ return TargetTransformInfo::TCC_Basic;
+ return TargetTransformInfo::TCC_Expensive;
}
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 9ba1e119494..8d186e2d5af 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -701,6 +701,22 @@ bool CallAnalyzer::visitCastInst(CastInst &I) {
// Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
disableSROA(I.getOperand(0));
+ // If this is a floating-point cast, and the target says this operation
+ // is expensive, this may eventually become a library call. Treat the cost
+ // as such.
+ switch (I.getOpcode()) {
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
+ Cost += InlineConstants::CallPenalty;
+ default:
+ break;
+ }
+
return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
}
@@ -1079,6 +1095,13 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
disableSROA(LHS);
disableSROA(RHS);
+ // If the instruction is floating point, and the target says this operation
+ // is expensive, this may eventually become a library call. Treat the cost
+ // as such.
+ if (I.getType()->isFloatingPointTy() &&
+ TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive)
+ Cost += InlineConstants::CallPenalty;
+
return false;
}
@@ -1548,17 +1571,6 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
- // If the instruction is floating point, and the target says this operation
- // is expensive or the function has the "use-soft-float" attribute, this may
- // eventually become a library call. Treat the cost as such.
- if (I->getType()->isFloatingPointTy()) {
- // If the function has the "use-soft-float" attribute, mark it as
- // expensive.
- if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
- (F.getFnAttribute("use-soft-float").getValueAsString() == "true"))
- Cost += InlineConstants::CallPenalty;
- }
-
// If the instruction simplified to a constant, there is no cost to this
// instruction. Visit the instructions using our InstVisitor to account for
// all of the per-instruction logic. The visit tree returns true if we
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index cae01e415ef..43d7888075b 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -394,25 +394,6 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
return 1;
}
-int ARMTTIImpl::getFPOpCost(Type *Ty) {
- // Use similar logic that's in ARMISelLowering:
- // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
- // to VFP.
-
- if (ST->hasVFP2() && !ST->isThumb1Only()) {
- if (Ty->isFloatTy()) {
- return TargetTransformInfo::TCC_Basic;
- }
-
- if (Ty->isDoubleTy()) {
- return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive :
- TargetTransformInfo::TCC_Basic;
- }
- }
-
- return TargetTransformInfo::TCC_Expensive;
-}
-
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
// We only handle costs of reverse and alternate shuffles for now.
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 99353a3219a..cd9fa070902 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -156,8 +156,6 @@ public:
int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
const SCEV *Ptr);
- int getFPOpCost(Type *Ty);
-
int getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
diff --git a/llvm/test/Transforms/Inline/ARM/inline-fp.ll b/llvm/test/Transforms/Inline/ARM/inline-fp.ll
new file mode 100644
index 00000000000..b4e76dfc7d2
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ARM/inline-fp.ll
@@ -0,0 +1,113 @@
+; RUN: opt -S -inline -mtriple=arm-eabi -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=NOFP
+; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2 -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=FULLFP
+; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2,+fp-only-sp -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=SINGLEFP
+; Make sure that soft float implementations are calculated as being more expensive
+; to the inliner.
+
+; NOFP-DAG: single not inlined into test_single because too costly to inline (cost=125, threshold=75)
+; NOFP-DAG: single not inlined into test_single because too costly to inline (cost=125, threshold=75)
+; NOFP-DAG: single_cheap inlined into test_single_cheap with cost=-15 (threshold=75)
+; NOFP-DAG: single_cheap inlined into test_single_cheap with cost=-15015 (threshold=75)
+; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75)
+; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75)
+; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+
+; FULLFP-DAG: single inlined into test_single with cost=0 (threshold=75)
+; FULLFP-DAG: single inlined into test_single with cost=-15000 (threshold=75)
+; FULLFP-DAG: single_cheap inlined into test_single_cheap with cost=-15 (threshold=75)
+; FULLFP-DAG: single_cheap inlined into test_single_cheap with cost=-15015 (threshold=75)
+; FULLFP-DAG: double inlined into test_double with cost=0 (threshold=75)
+; FULLFP-DAG: double inlined into test_double with cost=-15000 (threshold=75)
+; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+
+; SINGLEFP-DAG: single inlined into test_single with cost=0 (threshold=75)
+; SINGLEFP-DAG: single inlined into test_single with cost=-15000 (threshold=75)
+; SINGLEFP-DAG: single_cheap inlined into test_single_cheap with cost=-15 (threshold=75)
+; SINGLEFP-DAG: single_cheap inlined into test_single_cheap with cost=-15015 (threshold=75)
+; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75)
+; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75)
+; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75)
+
+define i32 @test_single(i32 %a, i8 %b, i32 %c, i8 %d) #0 {
+ %call = call float @single(i32 %a, i8 zeroext %b)
+ %call2 = call float @single(i32 %c, i8 zeroext %d)
+ ret i32 0
+}
+
+define i32 @test_single_cheap(i32 %a, i8 %b, i32 %c, i8 %d) #0 {
+ %call = call float @single_cheap(i32 %a, i8 zeroext %b)
+ %call2 = call float @single_cheap(i32 %c, i8 zeroext %d)
+ ret i32 0
+}
+
+define i32 @test_double(i32 %a, i8 %b, i32 %c, i8 %d) #0 {
+ %call = call double @double(i32 %a, i8 zeroext %b)
+ %call2 = call double @double(i32 %c, i8 zeroext %d)
+ ret i32 0
+}
+
+define i32 @test_single_force_soft(i32 %a, i8 %b, i32 %c, i8 %d) #1 {
+ %call = call float @single_force_soft(i32 %a, i8 zeroext %b) #1
+ %call2 = call float @single_force_soft(i32 %c, i8 zeroext %d) #1
+ ret i32 0
+}
+
+define internal float @single(i32 %response, i8 zeroext %value1) #0 {
+entry:
+ %conv = zext i8 %value1 to i32
+ %sub = add nsw i32 %conv, -1
+ %conv1 = sitofp i32 %sub to float
+ %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
+ %mul = fmul float %0, 2.620000e+03
+ %conv2 = sitofp i32 %response to float
+ %sub3 = fsub float %conv2, %mul
+ %div = fdiv float %sub3, %mul
+ ret float %div
+}
+
+define internal float @single_cheap(i32 %response, i8 zeroext %value1) #0 {
+entry:
+ %conv = zext i8 %value1 to i32
+ %sub = add nsw i32 %conv, -1
+ %conv1 = bitcast i32 %sub to float
+ %conv2 = bitcast i32 %response to float
+ %0 = tail call float @llvm.pow.f32(float %conv2, float %conv1)
+ %1 = tail call float @llvm.pow.f32(float %0, float %0)
+ %2 = tail call float @llvm.pow.f32(float %1, float %1)
+ ret float %2
+}
+
+define internal double @double(i32 %response, i8 zeroext %value1) #0 {
+entry:
+ %conv = zext i8 %value1 to i32
+ %sub = add nsw i32 %conv, -1
+ %conv1 = sitofp i32 %sub to double
+ %0 = tail call double @llvm.pow.f64(double 0x3FF028F5C0000000, double %conv1)
+ %mul = fmul double %0, 2.620000e+03
+ %conv2 = sitofp i32 %response to double
+ %sub3 = fsub double %conv2, %mul
+ %div = fdiv double %sub3, %mul
+ ret double %div
+}
+
+define internal float @single_force_soft(i32 %response, i8 zeroext %value1) #1 {
+entry:
+ %conv = zext i8 %value1 to i32
+ %sub = add nsw i32 %conv, -1
+ %conv1 = sitofp i32 %sub to float
+ %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
+ %mul = fmul float %0, 2.620000e+03
+ %conv2 = sitofp i32 %response to float
+ %sub3 = fsub float %conv2, %mul
+ %div = fdiv float %sub3, %mul
+ ret float %div
+}
+
+declare float @llvm.pow.f32(float, float) optsize minsize
+declare double @llvm.pow.f64(double, double) optsize minsize
+
+attributes #0 = { optsize }
+attributes #1 = { optsize "use-soft-float"="true" "target-features"="+soft-float" }
OpenPOWER on IntegriCloud