diff options
author | James Molloy <james.molloy@arm.com> | 2016-09-26 07:26:24 +0000 |
---|---|---|
committer | James Molloy <james.molloy@arm.com> | 2016-09-26 07:26:24 +0000 |
commit | 9abb2fa5bb66785379c5b4cb46426cb61ee01c93 (patch) | |
tree | e127550fa15cdf122cbceac62e705ca84f8d530a /llvm/lib/Target/ARM/ARMISelLowering.cpp | |
parent | 9559f04b9db004375b10f79e30d0e321c62ab222 (diff) | |
download | bcm5719-llvm-9abb2fa5bb66785379c5b4cb46426cb61ee01c93.tar.gz bcm5719-llvm-9abb2fa5bb66785379c5b4cb46426cb61ee01c93.zip |
[ARM] Promote small global constants to constant pools
If a constant is unamed_addr and is only used within one function, we can save
on the code size and runtime cost of an indirection by changing the global's storage
to inside the constant pool. For example, instead of:
ldr r0, .CPI0
bl printf
bx lr
.CPI0: &format_string
format_string: .asciz "hello, world!\n"
We can emit:
adr r0, .CPI0
bl printf
bx lr
.CPI0: .asciz "hello, world!\n"
This can cause significant code size savings when many small strings are used in one
function (4 bytes per string).
This recommit contains fixes for a nasty bug related to fast-isel fallback - because
fast-isel doesn't know about this optimization, if it runs and emits references to
a string that we inline (because fast-isel fell back to SDAG) we will end up
with an inlined string and also an out-of-line string, and we won't emit the
out-of-line string, causing backend failures.
It also contains fixes for emitting .text relocations which made the sanitizer
bots unhappy.
llvm-svn: 282387
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 434b4546483..018fb1617d6 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" @@ -59,12 +60,28 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); +STATISTIC(NumConstpoolPromoted, + "Number of constants with their storage promoted into constant pools"); static cl::opt<bool> ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); +static cl::opt<bool> EnableConstpoolPromotion( + "arm-promote-constant", cl::Hidden, + cl::desc("Enable / disable promotion of unnamed_addr constants into " + "constant pools"), + cl::init(true)); +static cl::opt<unsigned> ConstpoolPromotionMaxSize( + "arm-promote-constant-max-size", cl::Hidden, + cl::desc("Maximum size of constant to promote into a constant pool"), + cl::init(64)); +static cl::opt<unsigned> ConstpoolPromotionMaxTotal( + "arm-promote-constant-max-total", cl::Hidden, + cl::desc("Maximum size of ALL constants to promote into a constant pool"), + cl::init(128)); + namespace { class ARMCCState : public CCState { public: @@ -2963,6 +2980,154 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("bogus TLS model"); } +/// Return true if all users of V are within function F, looking through +/// ConstantExprs. +static bool allUsersAreInFunction(const Value *V, const Function *F) { + SmallVector<const User*,4> Worklist; + for (auto *U : V->users()) + Worklist.push_back(U); + while (!Worklist.empty()) { + auto *U = Worklist.pop_back_val(); + if (isa<ConstantExpr>(U)) { + for (auto *UU : U->users()) + Worklist.push_back(UU); + continue; + } + + auto *I = dyn_cast<Instruction>(U); + if (!I || I->getParent()->getParent() != F) + return false; + } + return true; +} + +/// Return true if all users of V are within some (any) function, looking through +/// ConstantExprs. In other words, are there any global constant users? +static bool allUsersAreInFunctions(const Value *V) { + SmallVector<const User*,4> Worklist; + for (auto *U : V->users()) + Worklist.push_back(U); + while (!Worklist.empty()) { + auto *U = Worklist.pop_back_val(); + if (isa<ConstantExpr>(U)) { + for (auto *UU : U->users()) + Worklist.push_back(UU); + continue; + } + + if (!isa<Instruction>(U)) + return false; + } + return true; +} + +// Return true if T is an integer, float or an array/vector of either. +static bool isSimpleType(Type *T) { + if (T->isIntegerTy() || T->isFloatingPointTy()) + return true; + Type *SubT = nullptr; + if (T->isArrayTy()) + SubT = T->getArrayElementType(); + else if (T->isVectorTy()) + SubT = T->getVectorElementType(); + else + return false; + return SubT->isIntegerTy() || SubT->isFloatingPointTy(); +} + +static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, + EVT PtrVT, SDLoc dl) { + // If we're creating a pool entry for a constant global with unnamed address, + // and the global is small enough, we can emit it inline into the constant pool + // to save ourselves an indirection. + // + // This is a win if the constant is only used in one function (so it doesn't + // need to be duplicated) or duplicating the constant wouldn't increase code + // size (implying the constant is no larger than 4 bytes). + const Function *F = DAG.getMachineFunction().getFunction(); + + // We rely on this decision to inline being idemopotent and unrelated to the + // use-site. We know that if we inline a variable at one use site, we'll + // inline it elsewhere too (and reuse the constant pool entry). Fast-isel + // doesn't know about this optimization, so bail out if it's enabled else + // we could decide to inline here (and thus never emit the GV) but require + // the GV from fast-isel generated code. + if (!EnableConstpoolPromotion || + DAG.getMachineFunction().getTarget().Options.EnableFastISel) + return SDValue(); + + auto *GVar = dyn_cast<GlobalVariable>(GV); + if (!GVar || !GVar->hasInitializer() || + !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() || + !GVar->hasLocalLinkage()) + return SDValue(); + + // Ensure that we don't try and inline any type that contains pointers. If + // we inline a value that contains relocations, we move the relocations from + // .data to .text which is not ideal. + auto *Init = GVar->getInitializer(); + if (!isSimpleType(Init->getType())) + return SDValue(); + + // The constant islands pass can only really deal with alignment requests + // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote + // any type wanting greater alignment requirements than 4 bytes. We also + // can only promote constants that are multiples of 4 bytes in size or + // are paddable to a multiple of 4. Currently we only try and pad constants + // that are strings for simplicity. + auto *CDAInit = dyn_cast<ConstantDataArray>(Init); + unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType()); + unsigned Align = DAG.getDataLayout().getABITypeAlignment(Init->getType()); + unsigned RequiredPadding = 4 - (Size % 4); + bool PaddingPossible = + RequiredPadding == 4 || (CDAInit && CDAInit->isString()); + if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize) + return SDValue(); + + unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding); + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + // We can't bloat the constant pool too much, else the ConstantIslands pass + // may fail to converge. If we haven't promoted this global yet (it may have + // multiple uses), and promoting it would increase the constant pool size (Sz + // > 4), ensure we have space to do so up to MaxTotal. + if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4) + if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >= + ConstpoolPromotionMaxTotal) + return SDValue(); + + // This is only valid if all users are in a single function OR it has users + // in multiple functions but it no larger than a pointer. We also check if + // GVar has constant (non-ConstantExpr) users. If so, it essentially has its + // address taken. + if (!allUsersAreInFunction(GVar, F) && + !(Size <= 4 && allUsersAreInFunctions(GVar))) + return SDValue(); + + // We're going to inline this global. Pad it out if needed. + if (RequiredPadding != 4) { + StringRef S = CDAInit->getAsString(); + + SmallVector<uint8_t,16> V(S.size()); + std::copy(S.bytes_begin(), S.bytes_end(), V.begin()); + while (RequiredPadding--) + V.push_back(0); + Init = ConstantDataArray::get(*DAG.getContext(), V); + } + + auto CPVal = ARMConstantPoolConstant::Create(GVar, Init); + SDValue CPAddr = + DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4); + if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) { + AFI->markGlobalAsPromotedToConstantPool(GVar); + AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() + + PaddedSize - 4); + } + ++NumConstpoolPromoted; + return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); +} + SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -2974,6 +3139,11 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, bool IsRO = (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) || isa<Function>(GV); + + if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl)) + return V; + if (isPositionIndependent()) { bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); |