diff options
author | Gor Nishanov <GorNishanov@gmail.com> | 2016-08-29 14:34:12 +0000 |
---|---|---|
committer | Gor Nishanov <GorNishanov@gmail.com> | 2016-08-29 14:34:12 +0000 |
commit | dce9b026773160769625a4809bf2dfbf4e636ef1 (patch) | |
tree | bc74ce0a7bee4b4baf97e789bf821a04fa0d2ce7 /llvm/lib/Transforms | |
parent | b57d0a2fda00fd50f78dc89802b457072194a75a (diff) | |
download | bcm5719-llvm-dce9b026773160769625a4809bf2dfbf4e636ef1.tar.gz bcm5719-llvm-dce9b026773160769625a4809bf2dfbf4e636ef1.zip |
[Coroutines] Part 9: Add cleanup subfunction.
Summary:
[Coroutines] Part 9: Add cleanup subfunction.
This patch completes coroutine heap allocation elision. Now, the heap elision example from docs\Coroutines.rst compiles and produces expected result (see test/Transform/Coroutines/ex3.ll)
Intrinsic Changes:
* coro.free gets a token parameter tying it to coro.id to allow reliably discovering all coro.frees associated with a particular coroutine.
* coro.id gets an extra parameter that points back to a coroutine function. This allows to check whether a coro.id describes the enclosing function or it belongs to a different function that was later inlined.
CoroSplit now creates three subfunctions:
# f$resume - resume logic
# f$destroy - cleanup logic, followed by a deallocation code
# f$cleanup - just the cleanup code
CoroElide pass during devirtualization replaces coro.destroy with either f$destroy or f$cleanup depending whether heap elision is performed or not.
Other fixes, improvements:
* Fixed buglet in Shape::buildFrame that was not creating coro.save properly if coroutine has more than one suspend point.
* Switched to using variable width suspend index field (no longer limited to 32 bit index field can be as little as i1 or as large as i<whatever-size_t-is>)
Reviewers: majnemer
Subscribers: llvm-commits, mehdi_amini
Differential Revision: https://reviews.llvm.org/D23844
llvm-svn: 279971
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroEarly.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroElide.cpp | 72 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroInstr.h | 24 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroInternal.h | 16 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/Coroutines.cpp | 23 |
7 files changed, 140 insertions, 65 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index 3e464fefe81..eed4a98fc91 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -81,6 +81,7 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) { if (CII->getInfo().isPreSplit()) { F.addFnAttr(CORO_PRESPLIT_ATTR, UNPREPARED_FOR_SPLIT); setCannotDuplicate(CII); + CII->setCoroutineSelf(); } } break; diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp index 03b06731f9d..99974d8da64 100644 --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -35,6 +35,7 @@ struct Lowerer : coro::LowererBase { Lowerer(Module &M) : LowererBase(M) {} void elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA); + bool shouldElide() const; bool processCoroId(CoroIdInst *, AAResults &AA); }; } // end anonymous namespace @@ -122,14 +123,6 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) { CA->eraseFromParent(); } - // To suppress deallocation code, we replace all llvm.coro.free intrinsics - // associated with this coro.begin with null constant. - auto *NullPtr = ConstantPointerNull::get(Type::getInt8PtrTy(C)); - for (auto *CF : CoroFrees) { - CF->replaceAllUsesWith(NullPtr); - CF->eraseFromParent(); - } - // FIXME: Design how to transmit alignment information for every alloca that // is spilled into the coroutine frame and recreate the alignment information // here. Possibly we will need to do a mini SROA here and break the coroutine @@ -148,9 +141,36 @@ void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) { removeTailCallAttribute(Frame, AA); } +bool Lowerer::shouldElide() const { + // If no CoroAllocs, we cannot suppress allocation, so elision is not + // possible. + if (CoroAllocs.empty()) + return false; + + // Check that for every coro.begin there is a coro.destroy directly + // referencing the SSA value of that coro.begin. If the value escaped, then + // coro.destroy would have been referencing a memory location storing that + // value and not the virtual register. + + SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins; + + for (CoroSubFnInst *DA : DestroyAddr) { + if (auto *CB = dyn_cast<CoroBeginInst>(DA->getFrame())) + ReferencedCoroBegins.insert(CB); + else + return false; + } + + // If size of the set is the same as total number of CoroBegins, means we + // found a coro.free or coro.destroy mentioning a coro.begin and we can + // perform heap elision. + return ReferencedCoroBegins.size() == CoroBegins.size(); +} + bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA) { CoroBegins.clear(); CoroAllocs.clear(); + CoroFrees.clear(); ResumeAddr.clear(); DestroyAddr.clear(); @@ -160,6 +180,8 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA) { CoroBegins.push_back(CB); else if (auto *CA = dyn_cast<CoroAllocInst>(U)) CoroAllocs.push_back(CA); + else if (auto *CF = dyn_cast<CoroFreeInst>(U)) + CoroFrees.push_back(CF); } // Collect all coro.subfn.addrs associated with coro.begin. @@ -191,27 +213,20 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA) { replaceWithConstant(ResumeAddrConstant, ResumeAddr); - if (DestroyAddr.empty()) - return true; + bool ShouldElide = shouldElide(); - auto *DestroyAddrConstant = - ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::DestroyIndex); + auto *DestroyAddrConstant = ConstantExpr::getExtractValue( + Resumers, + ShouldElide ? CoroSubFnInst::CleanupIndex : CoroSubFnInst::DestroyIndex); replaceWithConstant(DestroyAddrConstant, DestroyAddr); - // If there is a coro.alloc that llvm.coro.id refers to, we have the ability - // to suppress dynamic allocation. - if (!CoroAllocs.empty()) { - // FIXME: The check above is overly lax. It only checks for whether we have - // an ability to elide heap allocations, not whether it is safe to do so. - // We need to do something like: - // If for every exit from the function where coro.begin is - // live, there is a coro.free or coro.destroy dominating that exit block, - // then it is safe to elide heap allocation, since the lifetime of coroutine - // is fully enclosed in its caller. + if (ShouldElide) { auto *FrameTy = getFrameType(cast<Function>(ResumeAddrConstant)); elideHeapAllocations(CoroId->getFunction(), FrameTy, AA); + coro::replaceCoroFree(CoroId, /*Elide=*/true); } + return true; } @@ -262,21 +277,21 @@ struct CoroElide : FunctionPass { Changed = replaceDevirtTrigger(F); L->CoroIds.clear(); - L->CoroFrees.clear(); - // Collect all PostSplit coro.ids and all coro.free. + // Collect all PostSplit coro.ids. for (auto &I : instructions(F)) - if (auto *CF = dyn_cast<CoroFreeInst>(&I)) - L->CoroFrees.push_back(CF); - else if (auto *CII = dyn_cast<CoroIdInst>(&I)) + if (auto *CII = dyn_cast<CoroIdInst>(&I)) if (CII->getInfo().isPostSplit()) - L->CoroIds.push_back(CII); + // If it is the coroutine itself, don't touch it. + if (CII->getCoroutine() != CII->getFunction()) + L->CoroIds.push_back(CII); // If we did not find any coro.id, there is nothing to do. if (L->CoroIds.empty()) return Changed; AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + for (auto *CII : L->CoroIds) Changed |= L->processCoroId(CII, AA); @@ -284,7 +299,6 @@ struct CoroElide : FunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AAResultsWrapperPass>(); - AU.setPreservesCFG(); } }; } diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 66002fe16c5..bf1d296f03d 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/circular_raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -182,8 +183,9 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) } // Iterate propagating consumes and kills until they stop changing - int Iteration = 0; (void)Iteration; - + int Iteration = 0; + (void)Iteration; + bool Changed; do { DEBUG(dbgs() << "iteration " << ++Iteration); @@ -307,10 +309,10 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, /*IsVarArgs=*/false); auto *FnPtrTy = FnTy->getPointerTo(); - if (Shape.CoroSuspends.size() > UINT32_MAX) - report_fatal_error("Cannot handle coroutine with this many suspend points"); + // Figure out how wide should be an integer type storing the suspend index. + unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); - SmallVector<Type *, 8> Types{FnPtrTy, FnPtrTy, Type::getInt32Ty(C)}; + SmallVector<Type *, 8> Types{FnPtrTy, FnPtrTy, Type::getIntNTy(C, IndexBits)}; Value *CurrentDef = nullptr; // Create an entry for every spilled value. @@ -333,13 +335,6 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, return FrameTy; } -// Returns the index of the last non-spill field in the coroutine frame. -// 2 - if there is no coroutine promise specified or 3, if there is. -static unsigned getLastNonSpillIndex(coro::Shape &Shape) { - // TODO: Add support for coroutine promise. - return 2; -} - // Replace all alloca and SSA values that are accessed across suspend points // with GetElementPointer from coroutine frame + loads and stores. Create an // AllocaSpillBB that will become the new entry block for the resume parts of @@ -373,7 +368,7 @@ static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) { Value *CurrentValue = nullptr; BasicBlock *CurrentBlock = nullptr; Value *CurrentReload = nullptr; - unsigned Index = getLastNonSpillIndex(Shape); + unsigned Index = coro::Shape::LastKnownField; // We need to keep track of any allocas that need "spilling" // since they will live in the coroutine frame now, all access to them @@ -622,6 +617,10 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { // frame. if (isa<CoroBeginInst>(&I)) continue; + // A token returned CoroIdInst is used to tie together structural intrinsics + // in a coroutine. It should not be saved to the coroutine frame. + if (isa<CoroIdInst>(&I)) + continue; for (User *U : I.users()) if (Checker.isDefinitionAcrossSuspend(I, U)) { diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h index a6d81017985..ec60bfb7bea 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInstr.h +++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h @@ -11,7 +11,7 @@ // allows you to do things like: // // if (auto *SF = dyn_cast<CoroSubFnInst>(Inst)) -// ... SF->getFrame() ... +// ... SF->getFrame() ... // // All intrinsic function calls are instances of the call instruction, so these // are all subclasses of the CallInst class. Note that none of these classes @@ -37,6 +37,7 @@ public: RestartTrigger = -1, ResumeIndex, DestroyIndex, + CleanupIndex, IndexLast, IndexFirst = RestartTrigger }; @@ -76,7 +77,8 @@ public: /// This represents the llvm.coro.alloc instruction. class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst { - enum { AlignArg, PromiseArg, InfoArg }; + enum { AlignArg, PromiseArg, CoroutineArg, InfoArg }; + public: // Info argument of coro.id is // fresh out of the frontend: null ; @@ -118,6 +120,16 @@ public: void setInfo(Constant *C) { setArgOperand(InfoArg, C); } + Function *getCoroutine() const { + return cast<Function>(getArgOperand(CoroutineArg)->stripPointerCasts()); + } + void setCoroutineSelf() { + assert(isa<ConstantPointerNull>(getArgOperand(CoroutineArg)) && + "Coroutine argument is already assigned"); + auto *const Int8PtrTy = Type::getInt8PtrTy(getContext()); + setArgOperand(CoroutineArg, + ConstantExpr::getBitCast(getFunction(), Int8PtrTy)); + } // Methods to support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const IntrinsicInst *I) { @@ -142,7 +154,11 @@ public: /// This represents the llvm.coro.free instruction. class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst { + enum { IdArg, FrameArg }; + public: + Value *getFrame() const { return getArgOperand(FrameArg); } + // Methods to support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_free; @@ -157,9 +173,7 @@ class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst { enum { IdArg, MemArg }; public: - CoroIdInst *getId() const { - return cast<CoroIdInst>(getArgOperand(IdArg)); - } + CoroIdInst *getId() const { return cast<CoroIdInst>(getArgOperand(IdArg)); } Value *getMem() const { return getArgOperand(MemArg); } diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index 161cbe32bb0..8e6cc0c9560 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -46,6 +46,7 @@ namespace coro { bool declaresIntrinsics(Module &M, std::initializer_list<StringRef>); void replaceAllCoroAllocs(CoroBeginInst *CB, bool Replacement); void replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement); +void replaceCoroFree(CoroIdInst *CoroId, bool Elide); void updateCallGraph(Function &Caller, ArrayRef<Function *> Funcs, CallGraph &CG, CallGraphSCC &SCC); @@ -71,9 +72,10 @@ struct LLVM_LIBRARY_VISIBILITY Shape { // Field Indexes for known coroutine frame fields. enum { - ResumeField = 0, - DestroyField = 1, - IndexField = 2, + ResumeField, + DestroyField, + IndexField, + LastKnownField = IndexField }; StructType *FrameTy; @@ -82,6 +84,14 @@ struct LLVM_LIBRARY_VISIBILITY Shape { SwitchInst* ResumeSwitch; bool HasFinalSuspend; + IntegerType* getIndexType() const { + assert(FrameTy && "frame type not assigned"); + return cast<IntegerType>(FrameTy->getElementType(IndexField)); + } + ConstantInt *getIndex(uint64_t Value) const { + return ConstantInt::get(getIndexType(), Value); + } + Shape() = default; explicit Shape(Function &F) { buildFrom(F); } void buildFrom(Function &F); diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 93e51d9d597..a03e1eb1109 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -64,7 +64,7 @@ static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) { uint32_t SuspendIndex = 0; for (auto S : Shape.CoroSuspends) { - ConstantInt *IndexVal = Builder.getInt32(SuspendIndex); + ConstantInt *IndexVal = Shape.getIndex(SuspendIndex); // Replace CoroSave with a store to Index: // %index.addr = getelementptr %f.frame... (index field number) @@ -140,7 +140,6 @@ static void replaceFallthroughCoroEnd(IntrinsicInst *End, // resume or cleanup pass for every suspend point. static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, BasicBlock *ResumeEntry, int8_t FnIndex) { - Module *M = F.getParent(); auto *FrameTy = Shape.FrameTy; auto *FnPtrTy = cast<PointerType>(FrameTy->getElementType(0)); @@ -207,7 +206,11 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, OldVFrame->replaceAllUsesWith(NewVFrame); // Replace coro suspend with the appropriate resume index. - auto *NewValue = Builder.getInt8(FnIndex); + // Replacing coro.suspend with (0) will result in control flow proceeding to + // a resume label associated with a suspend point, replacing it with (1) will + // result in control flow proceeding to a cleanup label associated with this + // suspend point. + auto *NewValue = Builder.getInt8(FnIndex ? 1 : 0); for (CoroSuspendInst *CS : Shape.CoroSuspends) { auto *MappedCS = cast<CoroSuspendInst>(VMap[CS]); MappedCS->replaceAllUsesWith(NewValue); @@ -219,11 +222,23 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, // FIXME: coming in upcoming patches: // replaceUnwindCoroEnds(Shape.CoroEnds, VMap); - // Store the address of this clone in the coroutine frame. - Builder.SetInsertPoint(Shape.FramePtr->getNextNode()); - auto *G = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, Shape.FramePtr, 0, - FnIndex, "fn.addr"); - Builder.CreateStore(NewF, G); + // We only store resume(0) and destroy(1) addresses in the coroutine frame. + // The cleanup(2) clone is only used during devirtualization when coroutine is + // eligible for heap elision and thus does not participate in indirect calls + // and does not need its address to be stored in the coroutine frame. + if (FnIndex < 2) { + // Store the address of this clone in the coroutine frame. + Builder.SetInsertPoint(Shape.FramePtr->getNextNode()); + auto *G = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, Shape.FramePtr, + 0, FnIndex, "fn.addr"); + Builder.CreateStore(NewF, G); + } + + // Eliminate coro.free from the clones, replacing it with 'null' in cleanup, + // to suppress deallocation code. + coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]), + /*Elide=*/FnIndex == 2); + NewF->setCallingConv(CallingConv::Fast); return NewF; @@ -303,10 +318,12 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { return; buildCoroutineFrame(F, Shape); + replaceFrameSize(Shape); auto *ResumeEntry = createResumeEntryBlock(F, Shape); - auto *ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0); - auto *DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1); + auto ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0); + auto DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1); + auto CleanupClone = createClone(F, ".cleanup", Shape, ResumeEntry, 2); // We no longer need coro.end in F. removeCoroEnds(Shape); @@ -314,11 +331,10 @@ static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { postSplitCleanup(F); postSplitCleanup(*ResumeClone); postSplitCleanup(*DestroyClone); + postSplitCleanup(*CleanupClone); - replaceFrameSize(Shape); - - setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone}); - coro::updateCallGraph(F, {ResumeClone, DestroyClone}, CG, SCC); + setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone, CleanupClone}); + coro::updateCallGraph(F, {ResumeClone, DestroyClone, CleanupClone}, CG, SCC); } // When we see the coroutine the first time, we insert an indirect call to a diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index 1fe9dc44779..daded73d8fb 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -127,6 +127,27 @@ bool coro::declaresIntrinsics(Module &M, return false; } +// Replace all coro.frees associated with the provided CoroId either with 'null' +// if Elide is true and with its frame parameter otherwise. +void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) { + SmallVector<CoroFreeInst *, 4> CoroFrees; + for (User *U : CoroId->users()) + if (auto CF = dyn_cast<CoroFreeInst>(U)) + CoroFrees.push_back(CF); + + if (CoroFrees.empty()) + return; + + Value *Replacement = + Elide ? ConstantPointerNull::get(Type::getInt8PtrTy(CoroId->getContext())) + : CoroFrees.front()->getFrame(); + + for (CoroFreeInst *CF : CoroFrees) { + CF->replaceAllUsesWith(Replacement); + CF->eraseFromParent(); + } +} + // FIXME: This code is stolen from CallGraph::addToCallGraph(Function *F), which // happens to be private. It is better for this functionality exposed by the // CallGraph. @@ -286,5 +307,5 @@ void coro::Shape::buildFrom(Function &F) { // Canonicalize coro.suspend by inserting a coro.save if needed. for (CoroSuspendInst *CS : CoroSuspends) if (!CS->getCoroSave()) - createCoroSave(CoroBegin, CoroSuspends.back()); + createCoroSave(CoroBegin, CS); } |