summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorJames Molloy <james.molloy@arm.com>2016-09-15 12:30:27 +0000
committerJames Molloy <james.molloy@arm.com>2016-09-15 12:30:27 +0000
commitfe7fd879d78c97a94a57d9beb7b5896f75491ae7 (patch)
treecea20653361a6b41b4a32549574a52d289eecd6d /llvm/lib/Target
parent50bc34ca31d1499472a1117ded41be34e4c44067 (diff)
downloadbcm5719-llvm-fe7fd879d78c97a94a57d9beb7b5896f75491ae7.tar.gz
bcm5719-llvm-fe7fd879d78c97a94a57d9beb7b5896f75491ae7.zip
[ARM] Promote small global constants to constant pools
If a constant is unamed_addr and is only used within one function, we can save on the code size and runtime cost of an indirection by changing the global's storage to inside the constant pool. For example, instead of: ldr r0, .CPI0 bl printf bx lr .CPI0: &format_string format_string: .asciz "hello, world!\n" We can emit: adr r0, .CPI0 bl printf bx lr .CPI0: .asciz "hello, world!\n" This can cause significant code size savings when many small strings are used in one function (4 bytes per string). This recommit contains fixes for a nasty bug related to fast-isel fallback - because fast-isel doesn't know about this optimization, if it runs and emits references to a string that we inline (because fast-isel fell back to SDAG) we will end up with an inlined string and also an out-of-line string, and we won't emit the out-of-line string, causing backend failures. llvm-svn: 281604
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp13
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.h9
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp121
-rw-r--r--llvm/lib/Target/ARM/ARMMachineFunctionInfo.h13
4 files changed, 155 insertions, 1 deletions
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 186c6431481..8d44a26b1db 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -97,6 +97,13 @@ void ARMAsmPrinter::EmitXXStructor(const DataLayout &DL, const Constant *CV) {
OutStreamer->EmitValue(E, Size);
}
+void ARMAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+ if (PromotedGlobals.count(GV))
+ // The global was promoted into a constant pool. It should not be emitted.
+ return;
+ AsmPrinter::EmitGlobalVariable(GV);
+}
+
/// runOnMachineFunction - This uses the EmitInstruction()
/// method to print assembly for each instruction.
///
@@ -109,6 +116,12 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const Function* F = MF.getFunction();
const TargetMachine& TM = MF.getTarget();
+ // Collect all globals that had their storage promoted to a constant pool.
+ // Functions are emitted before variables, so this accumulates promoted
+ // globals from all functions in PromotedGlobals.
+ for (auto *GV : AFI->getGlobalsPromotedToConstantPool())
+ PromotedGlobals.insert(GV);
+
// Calculate this function's optimization goal.
unsigned OptimizationGoal;
if (F->hasFnAttribute(Attribute::OptimizeNone))
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.h b/llvm/lib/Target/ARM/ARMAsmPrinter.h
index 97f5ca0ecbc..30abdbc322a 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -56,6 +56,12 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
/// -1 if uninitialized, 0 if conflicting goals
int OptimizationGoals;
+ /// List of globals that have had their storage promoted to a constant
+ /// pool. This lives between calls to runOnMachineFunction and collects
+ /// data from every MachineFunction. It is used during doFinalization
+ /// when all non-function globals are emitted.
+ SmallPtrSet<const GlobalVariable*,2> PromotedGlobals;
+
public:
explicit ARMAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer);
@@ -90,7 +96,8 @@ public:
void EmitStartOfAsmFile(Module &M) override;
void EmitEndOfAsmFile(Module &M) override;
void EmitXXStructor(const DataLayout &DL, const Constant *CV) override;
-
+ void EmitGlobalVariable(const GlobalVariable *GV) override;
+
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 434b4546483..0d75d5eb7ae 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -59,12 +59,24 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
+STATISTIC(NumConstpoolPromoted,
+ "Number of constants with their storage promoted into constant pools");
static cl::opt<bool>
ARMInterworking("arm-interworking", cl::Hidden,
cl::desc("Enable / disable ARM interworking (for debugging only)"),
cl::init(true));
+static cl::opt<bool> EnableConstpoolPromotion(
+ "arm-promote-constant", cl::Hidden,
+ cl::desc("Enable / disable promotion of unnamed_addr constants into "
+ "constant pools"),
+ cl::init(true));
+static cl::opt<unsigned> ConstpoolPromotionMaxSize(
+ "arm-promote-constant-max-size", cl::Hidden,
+ cl::desc("Maximum size of constant to promote into a constant pool"),
+ cl::init(64));
+
namespace {
class ARMCCState : public CCState {
public:
@@ -2963,6 +2975,110 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("bogus TLS model");
}
+/// Return true if all users of V are within function F, looking through
+/// ConstantExprs.
+static bool allUsersAreInFunction(const Value *V, const Function *F) {
+ SmallVector<const User*,4> Worklist;
+ for (auto *U : V->users())
+ Worklist.push_back(U);
+ while (!Worklist.empty()) {
+ auto *U = Worklist.pop_back_val();
+ if (isa<ConstantExpr>(U)) {
+ for (auto *UU : U->users())
+ Worklist.push_back(UU);
+ continue;
+ }
+
+ auto *I = dyn_cast<Instruction>(U);
+ if (!I || I->getParent()->getParent() != F)
+ return false;
+ }
+ return true;
+}
+
+/// Return true if all users of V are within some (any) function, looking through
+/// ConstantExprs. In other words, are there any global constant users?
+static bool allUsersAreInFunctions(const Value *V) {
+ SmallVector<const User*,4> Worklist;
+ for (auto *U : V->users())
+ Worklist.push_back(U);
+ while (!Worklist.empty()) {
+ auto *U = Worklist.pop_back_val();
+ if (isa<ConstantExpr>(U)) {
+ for (auto *UU : U->users())
+ Worklist.push_back(UU);
+ continue;
+ }
+
+ if (!isa<Instruction>(U))
+ return false;
+ }
+ return true;
+}
+
+static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
+ EVT PtrVT, SDLoc dl) {
+ // If we're creating a pool entry for a constant global with unnamed address,
+ // and the global is small enough, we can emit it inline into the constant pool
+ // to save ourselves an indirection.
+ //
+ // This is a win if the constant is only used in one function (so it doesn't
+ // need to be duplicated) or duplicating the constant wouldn't increase code
+ // size (implying the constant is no larger than 4 bytes).
+ const Function *F = DAG.getMachineFunction().getFunction();
+
+ // We rely on this decision to inline being idemopotent and unrelated to the
+ // use-site. We know that if we inline a variable at one use site, we'll
+ // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
+ // doesn't know about this optimization, so bail out if it's enabled else
+ // we could decide to inline here (and thus never emit the GV) but require
+ // the GV from fast-isel generated code.
+ if (DAG.getMachineFunction().getTarget().Options.EnableFastISel)
+ return SDValue();
+
+ auto *GVar = dyn_cast<GlobalVariable>(GV);
+ if (EnableConstpoolPromotion && GVar && GVar->hasInitializer() &&
+ GVar->isConstant() && GVar->hasGlobalUnnamedAddr() &&
+ GVar->hasLocalLinkage()) {
+ // The constant islands pass can only really deal with alignment requests
+ // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
+ // any type wanting greater alignment requirements than 4 bytes. We also
+ // can only promote constants that are multiples of 4 bytes in size or
+ // are paddable to a multiple of 4. Currently we only try and pad constants
+ // that are strings for simplicity.
+ auto *Init = GVar->getInitializer();
+ auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
+ unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
+ unsigned Align = DAG.getDataLayout().getABITypeAlignment(Init->getType());
+ unsigned RequiredPadding = 4 - (Size % 4);
+ bool PaddingPossible =
+ RequiredPadding == 4 || (CDAInit && CDAInit->isString());
+
+ if (PaddingPossible && Align <= 4 && Size <= ConstpoolPromotionMaxSize &&
+ (allUsersAreInFunction(GVar, F) ||
+ (Size <= 4 && allUsersAreInFunctions(GVar)))) {
+ if (RequiredPadding != 4) {
+ StringRef S = CDAInit->getAsString();
+
+ SmallVector<uint8_t,16> V(S.size());
+ std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
+ while (RequiredPadding--)
+ V.push_back(0);
+ Init = ConstantDataArray::get(*DAG.getContext(), V);
+ }
+
+ SDValue CPAddr =
+ DAG.getTargetConstantPool(Init, PtrVT, /*Align=*/4);
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ AFI->markGlobalAsPromotedToConstantPool(GVar);
+ ++NumConstpoolPromoted;
+ return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+ }
+ }
+ return SDValue();
+}
+
SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2974,6 +3090,11 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
bool IsRO =
(isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
isa<Function>(GV);
+
+ if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
+ if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
+ return V;
+
if (isPositionIndependent()) {
bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index f71497240ff..52ba6a49fd5 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -121,6 +121,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// copies.
bool IsSplitCSR;
+ /// Globals that have had their storage promoted into the constant pool.
+ SmallVector<const GlobalVariable*,2> PromotedGlobals;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -226,6 +229,16 @@ public:
}
return It;
}
+
+ /// Indicate to the backend that \c GV has had its storage changed to inside
+ /// a constant pool. This means it no longer needs to be emitted as a
+ /// global variable.
+ void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV) {
+ PromotedGlobals.push_back(GV);
+ }
+ ArrayRef<const GlobalVariable*> getGlobalsPromotedToConstantPool() {
+ return PromotedGlobals;
+ }
};
} // End llvm namespace
OpenPOWER on IntegriCloud