diff options
Diffstat (limited to 'llvm/lib/ExecutionEngine/Orc')
-rw-r--r-- | llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp | 222 | ||||
-rw-r--r-- | llvm/lib/ExecutionEngine/Orc/Speculation.cpp | 135 |
2 files changed, 301 insertions, 56 deletions
diff --git a/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp b/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp index 52e9919f57c..f22acf50419 100644 --- a/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp +++ b/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp @@ -7,24 +7,30 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/Orc/SpeculateAnalyses.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/ErrorHandling.h" + +#include <algorithm> namespace { using namespace llvm; -std::vector<const BasicBlock *> findBBwithCalls(const Function &F, - bool IndirectCall = false) { - std::vector<const BasicBlock *> BBs; +SmallVector<const BasicBlock *, 8> findBBwithCalls(const Function &F, + bool IndirectCall = false) { + SmallVector<const BasicBlock *, 8> BBs; auto findCallInst = [&IndirectCall](const Instruction &I) { - if (auto Call = dyn_cast<CallBase>(&I)) { - if (Call->isIndirectCall()) - return IndirectCall; - else - return true; - } else + if (auto Call = dyn_cast<CallBase>(&I)) + return Call->isIndirectCall() ? IndirectCall : true; + else return false; }; for (auto &BB : F) @@ -38,11 +44,12 @@ std::vector<const BasicBlock *> findBBwithCalls(const Function &F, // Implementations of Queries shouldn't need to lock the resources // such as LLVMContext, each argument (function) has a non-shared LLVMContext +// Plus, if Queries contain states necessary locking scheme should be provided. namespace llvm { namespace orc { // Collect direct calls only -void BlockFreqQuery::findCalles(const BasicBlock *BB, +void SpeculateQuery::findCalles(const BasicBlock *BB, DenseSet<StringRef> &CallesNames) { assert(BB != nullptr && "Traversing Null BB to find calls?"); @@ -59,7 +66,14 @@ void BlockFreqQuery::findCalles(const BasicBlock *BB, getCalledFunction(II); } -// blind calculation +bool SpeculateQuery::isStraightLine(const Function &F) { + return llvm::all_of(F.getBasicBlockList(), [](const BasicBlock &BB) { + return BB.getSingleSuccessor() != nullptr; + }); +} + +// BlockFreqQuery Implementations + size_t BlockFreqQuery::numBBToGet(size_t numBB) { // small CFG if (numBB < 4) @@ -71,12 +85,15 @@ size_t BlockFreqQuery::numBBToGet(size_t numBB) { return (numBB / 2) + (numBB / 4); } -BlockFreqQuery::ResultTy BlockFreqQuery:: -operator()(Function &F, FunctionAnalysisManager &FAM) { +BlockFreqQuery::ResultTy BlockFreqQuery::operator()(Function &F) { DenseMap<StringRef, DenseSet<StringRef>> CallerAndCalles; DenseSet<StringRef> Calles; SmallVector<std::pair<const BasicBlock *, uint64_t>, 8> BBFreqs; + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + auto IBBs = findBBwithCalls(F); if (IBBs.empty()) @@ -107,5 +124,184 @@ operator()(Function &F, FunctionAnalysisManager &FAM) { return CallerAndCalles; } + +// SequenceBBQuery Implementation +std::size_t SequenceBBQuery::getHottestBlocks(std::size_t TotalBlocks) { + if (TotalBlocks == 1) + return TotalBlocks; + return TotalBlocks / 2; +} + +// FIXME : find good implementation. +SequenceBBQuery::BlockListTy +SequenceBBQuery::rearrangeBB(const Function &F, const BlockListTy &BBList) { + BlockListTy RearrangedBBSet; + + for (auto &Block : F.getBasicBlockList()) + if (llvm::is_contained(BBList, &Block)) + RearrangedBBSet.push_back(&Block); + + assert(RearrangedBBSet.size() == BBList.size() && + "BasicBlock missing while rearranging?"); + return RearrangedBBSet; +} + +void SequenceBBQuery::traverseToEntryBlock(const BasicBlock *AtBB, + const BlockListTy &CallerBlocks, + const BackEdgesInfoTy &BackEdgesInfo, + const BranchProbabilityInfo *BPI, + VisitedBlocksInfoTy &VisitedBlocks) { + auto Itr = VisitedBlocks.find(AtBB); + if (Itr != VisitedBlocks.end()) { // already visited. + if (!Itr->second.Upward) + return; + Itr->second.Upward = false; + } else { + // Create hint for newly discoverd blocks. + WalkDirection BlockHint; + BlockHint.Upward = false; + // FIXME: Expensive Check + if (llvm::is_contained(CallerBlocks, AtBB)) + BlockHint.CallerBlock = true; + VisitedBlocks.insert(std::make_pair(AtBB, BlockHint)); + } + + const_pred_iterator PIt = pred_begin(AtBB), EIt = pred_end(AtBB); + // Move this check to top, when we have code setup to launch speculative + // compiles for function in entry BB, this triggers the speculative compiles + // before running the program. + if (PIt == EIt) // No Preds. + return; + + DenseSet<const BasicBlock *> PredSkipNodes; + + // Since we are checking for predecessor's backedges, this Block + // occurs in second position. + for (auto &I : BackEdgesInfo) + if (I.second == AtBB) + PredSkipNodes.insert(I.first); + + // Skip predecessors which source of back-edges. + for (; PIt != EIt; ++PIt) + // checking EdgeHotness is cheaper + if (BPI->isEdgeHot(*PIt, AtBB) && !PredSkipNodes.count(*PIt)) + traverseToEntryBlock(*PIt, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); +} + +void SequenceBBQuery::traverseToExitBlock(const BasicBlock *AtBB, + const BlockListTy &CallerBlocks, + const BackEdgesInfoTy &BackEdgesInfo, + const BranchProbabilityInfo *BPI, + VisitedBlocksInfoTy &VisitedBlocks) { + auto Itr = VisitedBlocks.find(AtBB); + if (Itr != VisitedBlocks.end()) { // already visited. + if (!Itr->second.Downward) + return; + Itr->second.Downward = false; + } else { + // Create hint for newly discoverd blocks. + WalkDirection BlockHint; + BlockHint.Downward = false; + // FIXME: Expensive Check + if (llvm::is_contained(CallerBlocks, AtBB)) + BlockHint.CallerBlock = true; + VisitedBlocks.insert(std::make_pair(AtBB, BlockHint)); + } + + succ_const_iterator PIt = succ_begin(AtBB), EIt = succ_end(AtBB); + if (PIt == EIt) // No succs. + return; + + // If there are hot edges, then compute SuccSkipNodes. + DenseSet<const BasicBlock *> SuccSkipNodes; + + // Since we are checking for successor's backedges, this Block + // occurs in first position. + for (auto &I : BackEdgesInfo) + if (I.first == AtBB) + SuccSkipNodes.insert(I.second); + + for (; PIt != EIt; ++PIt) + if (BPI->isEdgeHot(AtBB, *PIt) && !SuccSkipNodes.count(*PIt)) + traverseToExitBlock(*PIt, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); +} + +// Get Block frequencies for blocks and take most frquently executed block, +// walk towards the entry block from those blocks and discover the basic blocks +// with call. +SequenceBBQuery::BlockListTy +SequenceBBQuery::queryCFG(Function &F, const BlockListTy &CallerBlocks) { + + BlockFreqInfoTy BBFreqs; + VisitedBlocksInfoTy VisitedBlocks; + BackEdgesInfoTy BackEdgesInfo; + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + + auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); + + llvm::FindFunctionBackedges(F, BackEdgesInfo); + + for (const auto I : CallerBlocks) + BBFreqs.push_back({I, BFI.getBlockFreq(I).getFrequency()}); + + llvm::sort(BBFreqs, [](decltype(BBFreqs)::const_reference Bbf, + decltype(BBFreqs)::const_reference Bbs) { + return Bbf.second > Bbs.second; + }); + + ArrayRef<std::pair<const BasicBlock *, uint64_t>> HotBlocksRef(BBFreqs); + HotBlocksRef = + HotBlocksRef.drop_back(BBFreqs.size() - getHottestBlocks(BBFreqs.size())); + + BranchProbabilityInfo *BPI = + FAM.getCachedResult<BranchProbabilityAnalysis>(F); + + // visit NHotBlocks, + // traverse upwards to entry + // traverse downwards to end. + + for (auto I : HotBlocksRef) { + traverseToEntryBlock(I.first, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); + traverseToExitBlock(I.first, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); + } + + BlockListTy MinCallerBlocks; + for (auto &I : VisitedBlocks) + if (I.second.CallerBlock) + MinCallerBlocks.push_back(std::move(I.first)); + + return rearrangeBB(F, MinCallerBlocks); +} + +SpeculateQuery::ResultTy SequenceBBQuery::operator()(Function &F) { + // reduce the number of lists! + DenseMap<StringRef, DenseSet<StringRef>> CallerAndCalles; + DenseSet<StringRef> Calles; + BlockListTy SequencedBlocks; + BlockListTy CallerBlocks; + + CallerBlocks = findBBwithCalls(F); + if (CallerBlocks.empty()) + return None; + + if (isStraightLine(F)) + SequencedBlocks = rearrangeBB(F, CallerBlocks); + else + SequencedBlocks = queryCFG(F, CallerBlocks); + + for (auto BB : SequencedBlocks) + findCalles(BB, Calles); + + CallerAndCalles.insert({F.getName(), std::move(Calles)}); + return CallerAndCalles; +} + } // namespace orc } // namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp index 5ab583124fe..4f2ea92706c 100644 --- a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/Orc/Speculation.h" - #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -17,6 +16,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Verifier.h" +#include "llvm/Support/Debug.h" #include <vector> @@ -36,11 +36,28 @@ void ImplSymbolMap::trackImpls(SymbolAliasMap ImplMaps, JITDylib *SrcJD) { } } +// Trigger Speculative Compiles. +void Speculator::speculateForEntryPoint(Speculator *Ptr, uint64_t StubId) { + assert(Ptr && " Null Address Received in orc_speculate_for "); + Ptr->speculateFor(StubId); +} + +Error Speculator::addSpeculationRuntime(JITDylib &JD, + MangleAndInterner &Mangle) { + JITEvaluatedSymbol ThisPtr(pointerToJITTargetAddress(this), + JITSymbolFlags::Exported); + JITEvaluatedSymbol SpeculateForEntryPtr( + pointerToJITTargetAddress(&speculateForEntryPoint), + JITSymbolFlags::Exported); + return JD.define(absoluteSymbols({ + {Mangle("__orc_speculator"), ThisPtr}, // Data Symbol + {Mangle("__orc_speculate_for"), SpeculateForEntryPtr} // Callable Symbol + })); +} + // If two modules, share the same LLVMContext, different threads must -// not access those modules concurrently, doing so leave the -// LLVMContext in in-consistent state. -// But here since each TSM has a unique Context associated with it, -// on locking is necessary! +// not access them concurrently without locking the associated LLVMContext +// this implementation follows this contract. void IRSpeculationLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { @@ -48,50 +65,82 @@ void IRSpeculationLayer::emit(MaterializationResponsibility R, assert(TSM.getContext().getContext() != nullptr && "Module with null LLVMContext?"); - // Instrumentation of runtime calls - auto &InContext = *TSM.getContext().getContext(); - auto SpeculatorVTy = StructType::create(InContext, "Class.Speculator"); - auto RuntimeCallTy = FunctionType::get( - Type::getVoidTy(InContext), - {SpeculatorVTy->getPointerTo(), Type::getInt64Ty(InContext)}, false); - auto RuntimeCall = - Function::Create(RuntimeCallTy, Function::LinkageTypes::ExternalLinkage, - "__orc_speculate_for", TSM.getModuleUnlocked()); - auto SpeclAddr = new GlobalVariable( - *TSM.getModuleUnlocked(), SpeculatorVTy, false, - GlobalValue::LinkageTypes::ExternalLinkage, nullptr, "__orc_speculator"); - - IRBuilder<> Mutator(InContext); - - // QueryAnalysis allowed to transform the IR source, one such example is - // Simplify CFG helps the static branch prediction heuristics! - for (auto &Fn : TSM.getModuleUnlocked()->getFunctionList()) { - if (!Fn.isDeclaration()) { - auto IRNames = QueryAnalysis(Fn, FAM); - // Instrument and register if Query has result - if (IRNames.hasValue()) { - Mutator.SetInsertPoint(&(Fn.getEntryBlock().front())); - auto ImplAddrToUint = - Mutator.CreatePtrToInt(&Fn, Type::getInt64Ty(InContext)); - Mutator.CreateCall(RuntimeCallTy, RuntimeCall, - {SpeclAddr, ImplAddrToUint}); - S.registerSymbols(internToJITSymbols(IRNames.getValue()), - &R.getTargetJITDylib()); + // Instrumentation of runtime calls, lock the Module + TSM.withModuleDo([this, &R](Module &M) { + auto &MContext = M.getContext(); + auto SpeculatorVTy = StructType::create(MContext, "Class.Speculator"); + auto RuntimeCallTy = FunctionType::get( + Type::getVoidTy(MContext), + {SpeculatorVTy->getPointerTo(), Type::getInt64Ty(MContext)}, false); + auto RuntimeCall = + Function::Create(RuntimeCallTy, Function::LinkageTypes::ExternalLinkage, + "__orc_speculate_for", &M); + auto SpeclAddr = new GlobalVariable( + M, SpeculatorVTy, false, GlobalValue::LinkageTypes::ExternalLinkage, + nullptr, "__orc_speculator"); + + IRBuilder<> Mutator(MContext); + + // QueryAnalysis allowed to transform the IR source, one such example is + // Simplify CFG helps the static branch prediction heuristics! + for (auto &Fn : M.getFunctionList()) { + if (!Fn.isDeclaration()) { + + auto IRNames = QueryAnalysis(Fn); + // Instrument and register if Query has result + if (IRNames.hasValue()) { + + // Emit globals for each function. + auto LoadValueTy = Type::getInt8Ty(MContext); + auto SpeculatorGuard = new GlobalVariable( + M, LoadValueTy, false, GlobalValue::LinkageTypes::InternalLinkage, + ConstantInt::get(LoadValueTy, 0), + "__orc_speculate.guard.for." + Fn.getName()); + SpeculatorGuard->setAlignment(1); + SpeculatorGuard->setUnnamedAddr(GlobalValue::UnnamedAddr::Local); + + BasicBlock &ProgramEntry = Fn.getEntryBlock(); + // Create BasicBlocks before the program's entry basicblock + BasicBlock *SpeculateBlock = BasicBlock::Create( + MContext, "__orc_speculate.block", &Fn, &ProgramEntry); + BasicBlock *SpeculateDecisionBlock = BasicBlock::Create( + MContext, "__orc_speculate.decision.block", &Fn, SpeculateBlock); + + assert(SpeculateDecisionBlock == &Fn.getEntryBlock() && + "SpeculateDecisionBlock not updated?"); + Mutator.SetInsertPoint(SpeculateDecisionBlock); + + auto LoadGuard = + Mutator.CreateLoad(LoadValueTy, SpeculatorGuard, "guard.value"); + // if just loaded value equal to 0,return true. + auto CanSpeculate = + Mutator.CreateICmpEQ(LoadGuard, ConstantInt::get(LoadValueTy, 0), + "compare.to.speculate"); + Mutator.CreateCondBr(CanSpeculate, SpeculateBlock, &ProgramEntry); + + Mutator.SetInsertPoint(SpeculateBlock); + auto ImplAddrToUint = + Mutator.CreatePtrToInt(&Fn, Type::getInt64Ty(MContext)); + Mutator.CreateCall(RuntimeCallTy, RuntimeCall, + {SpeclAddr, ImplAddrToUint}); + Mutator.CreateStore(ConstantInt::get(LoadValueTy, 1), + SpeculatorGuard); + Mutator.CreateBr(&ProgramEntry); + + assert(Mutator.GetInsertBlock()->getParent() == &Fn && + "IR builder association mismatch?"); + S.registerSymbols(internToJITSymbols(IRNames.getValue()), + &R.getTargetJITDylib()); + } } } - } - // No locking needed read only operation. - assert(!(verifyModule(*TSM.getModuleUnlocked())) && + }); + + assert(!TSM.withModuleDo([](const Module &M) { return verifyModule(M); }) && "Speculation Instrumentation breaks IR?"); NextLayer.emit(std::move(R), std::move(TSM)); } -// Runtime Function Implementation -extern "C" void __orc_speculate_for(Speculator *Ptr, uint64_t StubId) { - assert(Ptr && " Null Address Received in orc_speculate_for "); - Ptr->speculateFor(StubId); -} - } // namespace orc } // namespace llvm |