summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorFlorian Hahn <florian.hahn@arm.com>2017-07-13 08:26:17 +0000
committerFlorian Hahn <florian.hahn@arm.com>2017-07-13 08:26:17 +0000
commit4adcfcf1d6409bd2202900033a44c21def89a4dd (patch)
treebe7a82b98d860617bb998186eb6f6c27f96b0272 /llvm/lib
parent9fb04071a21737729616dbfbbe7adf8c33294251 (diff)
downloadbcm5719-llvm-4adcfcf1d6409bd2202900033a44c21def89a4dd.tar.gz
bcm5719-llvm-4adcfcf1d6409bd2202900033a44c21def89a4dd.zip
[ARM] Inline callee if its target-features are a subset of the caller
Summary: Similar to X86, it should be safe to inline callees if their target-features are a subset of the caller. As some subtarget features provide different instructions depending on whether they are set or unset (e.g. ThumbMode and ModeSoftFloat), we use a whitelist of target-features describing hardware capabilities only. Reviewers: kristof.beyls, rengolin, t.p.northover, SjoerdMeijer, peter.smith, silviu.baranga, efriedma Reviewed By: SjoerdMeijer, efriedma Subscribers: dschuff, efriedma, aemerson, sdardis, javed.absar, arichardson, eraman, llvm-commits Differential Revision: https://reviews.llvm.org/D34697 llvm-svn: 307889
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp18
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h36
2 files changed, 54 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 8eb9dbf5f9d..51b0fedd2b5 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -15,6 +15,24 @@ using namespace llvm;
#define DEBUG_TYPE "armtti"
+bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
+ const Function *Callee) const {
+ const TargetMachine &TM = getTLI()->getTargetMachine();
+ const FeatureBitset &CallerBits =
+ TM.getSubtargetImpl(*Caller)->getFeatureBits();
+ const FeatureBitset &CalleeBits =
+ TM.getSubtargetImpl(*Callee)->getFeatureBits();
+
+ // To inline a callee, all features not in the whitelist must match exactly.
+ bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
+ (CalleeBits & ~InlineFeatureWhitelist);
+ // For features in the whitelist, the callee's features must be a subset of
+ // the callers'.
+ bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
+ (CalleeBits & InlineFeatureWhitelist);
+ return MatchExact && MatchSubset;
+}
+
int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
assert(Ty->isIntegerTy());
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 8a1a3786387..0695a4e6334 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -33,6 +33,39 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
const ARMSubtarget *ST;
const ARMTargetLowering *TLI;
+ // Currently the following features are excluded from InlineFeatureWhitelist.
+ // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16
+ // Depending on whether they are set or unset, different
+ // instructions/registers are available. For example, inlining a callee with
+ // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
+ // fail if the callee uses ARM only instructions, e.g. in inline asm.
+ const FeatureBitset InlineFeatureWhitelist = {
+ ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
+ ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
+ ARM::FeatureFullFP16, ARM::FeatureHWDivThumb,
+ ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
+ ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
+ ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
+ ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
+ ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
+ ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
+ ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
+ ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
+ ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
+ ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
+ ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
+ ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
+ ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding,
+ ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR,
+ ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp,
+ ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor,
+ ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization,
+ ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass,
+ ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls,
+ ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt,
+ ARM::FeatureNoNegativeImmediates
+ };
+
const ARMSubtarget *getST() const { return ST; }
const ARMTargetLowering *getTLI() const { return TLI; }
@@ -41,6 +74,9 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
+ bool areInlineCompatible(const Function *Caller,
+ const Function *Callee) const;
+
bool enableInterleavedAccessVectorization() { return true; }
/// Floating-point computation using ARMv8 AArch32 Advanced
OpenPOWER on IntegriCloud