diff options
author | Peter Collingbourne <peter@pcc.me.uk> | 2015-02-25 20:42:41 +0000 |
---|---|---|
committer | Peter Collingbourne <peter@pcc.me.uk> | 2015-02-25 20:42:41 +0000 |
commit | eba7f73ff953c4b8d4e26abbc9d4e76142b8fb8c (patch) | |
tree | 4838b6258e3bbfa686e093526239a2cdfc058a81 /llvm/lib/Transforms | |
parent | 66bc9080d6d5ef4904f25d70c374a3dc2c218c9f (diff) | |
download | bcm5719-llvm-eba7f73ff953c4b8d4e26abbc9d4e76142b8fb8c.tar.gz bcm5719-llvm-eba7f73ff953c4b8d4e26abbc9d4e76142b8fb8c.zip |
LowerBitSets: Align referenced globals.
This change aligns globals to the next highest power of 2 bytes, up to a
maximum of 128. This makes it more likely that we will be able to compress
bit sets with a greater alignment. In many more cases, we can now take
advantage of a new optimization also introduced in this patch that removes
bit set checks if the bit set is all ones.
The 128 byte maximum was found to provide the best tradeoff between instruction
overhead and data overhead in a recent build of Chromium. It allows us to
remove ~2.4MB of instructions at the cost of ~250KB of data.
Differential Revision: http://reviews.llvm.org/D7873
llvm-svn: 230540
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/IPO/LowerBitSets.cpp | 62 |
1 files changed, 40 insertions, 22 deletions
diff --git a/llvm/lib/Transforms/IPO/LowerBitSets.cpp b/llvm/lib/Transforms/IPO/LowerBitSets.cpp index 032d6484ea2..0a22a809c53 100644 --- a/llvm/lib/Transforms/IPO/LowerBitSets.cpp +++ b/llvm/lib/Transforms/IPO/LowerBitSets.cpp @@ -157,6 +157,7 @@ struct LowerBitSets : public ModulePass { const DataLayout *DL; IntegerType *Int1Ty; + IntegerType *Int8Ty; IntegerType *Int32Ty; Type *Int32PtrTy; IntegerType *Int64Ty; @@ -173,7 +174,7 @@ struct LowerBitSets : public ModulePass { const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout); Value *createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI, GlobalVariable *BitSetGlobal, Value *BitOffset); - void + Value * lowerBitSetCall(CallInst *CI, const BitSetInfo &BSI, GlobalVariable *BitSetGlobal, GlobalVariable *CombinedGlobal, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout); @@ -203,6 +204,7 @@ bool LowerBitSets::doInitialization(Module &M) { report_fatal_error("Data layout required"); Int1Ty = Type::getInt1Ty(M.getContext()); + Int8Ty = Type::getInt8Ty(M.getContext()); Int32Ty = Type::getInt32Ty(M.getContext()); Int32PtrTy = PointerType::getUnqual(Int32Ty); Int64Ty = Type::getInt64Ty(M.getContext()); @@ -293,19 +295,16 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, const BitSetInfo &BSI, } } -/// Lower a llvm.bitset.test call to its implementation. -void LowerBitSets::lowerBitSetCall( +/// Lower a llvm.bitset.test call to its implementation. Returns the value to +/// replace the call with. +Value *LowerBitSets::lowerBitSetCall( CallInst *CI, const BitSetInfo &BSI, GlobalVariable *BitSetGlobal, GlobalVariable *CombinedGlobal, const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) { Value *Ptr = CI->getArgOperand(0); - if (BSI.containsValue(DL, GlobalLayout, Ptr)) { - CI->replaceAllUsesWith( - ConstantInt::getTrue(BitSetGlobal->getParent()->getContext())); - CI->eraseFromParent(); - return; - } + if (BSI.containsValue(DL, GlobalLayout, Ptr)) + return ConstantInt::getTrue(BitSetGlobal->getParent()->getContext()); Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy); Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( @@ -317,12 +316,8 @@ void LowerBitSets::lowerBitSetCall( Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); - if (BSI.isSingleOffset()) { - Value *Eq = B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt); - CI->replaceAllUsesWith(Eq); - CI->eraseFromParent(); - return; - } + if (BSI.isSingleOffset()) + return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt); Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt); @@ -349,6 +344,10 @@ void LowerBitSets::lowerBitSetCall( Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize); Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst); + // If the bit set is all ones, testing against it is unnecessary. + if (BSI.isAllOnes()) + return OffsetInRange; + TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false); IRBuilder<> ThenB(Term); @@ -363,9 +362,7 @@ void LowerBitSets::lowerBitSetCall( PHINode *P = B.CreatePHI(Int1Ty, 2); P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB); P->addIncoming(Bit, ThenB.GetInsertBlock()); - - CI->replaceAllUsesWith(P); - CI->eraseFromParent(); + return P; } /// Given a disjoint set of bitsets and globals, layout the globals, build the @@ -376,8 +373,24 @@ void LowerBitSets::buildBitSetsFromGlobals( const std::vector<GlobalVariable *> &Globals) { // Build a new global with the combined contents of the referenced globals. std::vector<Constant *> GlobalInits; - for (GlobalVariable *G : Globals) + for (GlobalVariable *G : Globals) { GlobalInits.push_back(G->getInitializer()); + uint64_t InitSize = DL->getTypeAllocSize(G->getInitializer()->getType()); + + // Compute the amount of padding required to align the next element to the + // next power of 2. + uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize; + + // Cap at 128 was found experimentally to have a good data/instruction + // overhead tradeoff. + if (Padding > 128) + Padding = RoundUpToAlignment(InitSize, 128) - InitSize; + + GlobalInits.push_back( + ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); + } + if (!GlobalInits.empty()) + GlobalInits.pop_back(); Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); auto CombinedGlobal = new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, @@ -389,7 +402,8 @@ void LowerBitSets::buildBitSetsFromGlobals( // Compute the offsets of the original globals within the new global. DenseMap<GlobalVariable *, uint64_t> GlobalLayout; for (unsigned I = 0; I != Globals.size(); ++I) - GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I); + // Multiply by 2 to account for padding elements. + GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2); // For each bitset in this disjoint set... for (MDString *BS : BitSets) { @@ -406,7 +420,10 @@ void LowerBitSets::buildBitSetsFromGlobals( // Lower each call to llvm.bitset.test for this bitset. for (CallInst *CI : BitSetTestCallSites[BS]) { ++NumBitSetCallsLowered; - lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout); + Value *Lowered = + lowerBitSetCall(CI, BSI, BitSetGlobal, CombinedGlobal, GlobalLayout); + CI->replaceAllUsesWith(Lowered); + CI->eraseFromParent(); } } @@ -414,8 +431,9 @@ void LowerBitSets::buildBitSetsFromGlobals( // global from which we built the combined global, and replace references // to the original globals with references to the aliases. for (unsigned I = 0; I != Globals.size(); ++I) { + // Multiply by 2 to account for padding elements. Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), - ConstantInt::get(Int32Ty, I)}; + ConstantInt::get(Int32Ty, I * 2)}; Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(CombinedGlobal, CombinedGlobalIdxs); GlobalAlias *GAlias = GlobalAlias::create( |