diff options
author | Gor Nishanov <GorNishanov@gmail.com> | 2016-08-10 16:40:39 +0000 |
---|---|---|
committer | Gor Nishanov <GorNishanov@gmail.com> | 2016-08-10 16:40:39 +0000 |
commit | b2a9c0252179d9334967266182bad77c1d3e6579 (patch) | |
tree | bdb018dd1e57587914081302fa50b38020038a64 /llvm/lib/Transforms/Coroutines | |
parent | 17586582e7cd22cbfe343dc7321c746046912423 (diff) | |
download | bcm5719-llvm-b2a9c0252179d9334967266182bad77c1d3e6579.tar.gz bcm5719-llvm-b2a9c0252179d9334967266182bad77c1d3e6579.zip |
[Coroutines] Part 6: Elide dynamic allocation of a coroutine frame when possible
Summary:
A particular coroutine usage pattern, where a coroutine is created, manipulated and
destroyed by the same calling function, is common for coroutines implementing
RAII idiom and is suitable for allocation elision optimization which avoid
dynamic allocation by storing the coroutine frame as a static `alloca` in its
caller.
coro.free and coro.alloc intrinsics are used to indicate which code needs to be suppressed
when dynamic allocation elision happens:
```
entry:
%elide = call i8* @llvm.coro.alloc()
%need.dyn.alloc = icmp ne i8* %elide, null
br i1 %need.dyn.alloc, label %coro.begin, label %dyn.alloc
dyn.alloc:
%alloc = call i8* @CustomAlloc(i32 4)
br label %coro.begin
coro.begin:
%phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ]
%hdl = call i8* @llvm.coro.begin(i8* %phi, i32 0, i8* null,
i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*))
```
and
```
%mem = call i8* @llvm.coro.free(i8* %hdl)
%need.dyn.free = icmp ne i8* %mem, null
br i1 %need.dyn.free, label %dyn.free, label %if.end
dyn.free:
call void @CustomFree(i8* %mem)
br label %if.end
if.end:
...
```
If heap allocation elision is performed, we replace coro.alloc with a static alloca on the caller frame and coro.free with null constant.
Also, we need to make sure that if there are any tail calls referencing the coroutine frame, we need to remote tail call attribute, since now coroutine frame lives on the stack.
Documentation and overview is here: http://llvm.org/docs/Coroutines.html.
Upstreaming sequence (rough plan)
1.Add documentation. (https://reviews.llvm.org/D22603)
2.Add coroutine intrinsics. (https://reviews.llvm.org/D22659)
3.Add empty coroutine passes. (https://reviews.llvm.org/D22847)
4.Add coroutine devirtualization + tests.
ab) Lower coro.resume and coro.destroy (https://reviews.llvm.org/D22998)
c) Do devirtualization (https://reviews.llvm.org/D23229)
5.Add CGSCC restart trigger + tests. (https://reviews.llvm.org/D23234)
6.Add coroutine heap elision + tests. <= we are here
7.Add the rest of the logic (split into more patches)
Reviewers: mehdi_amini, majnemer
Subscribers: mehdi_amini, llvm-commits
Differential Revision: https://reviews.llvm.org/D23245
llvm-svn: 278242
Diffstat (limited to 'llvm/lib/Transforms/Coroutines')
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroElide.cpp | 158 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroInstr.h | 64 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroInternal.h | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Coroutines/Coroutines.cpp | 18 |
4 files changed, 213 insertions, 28 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp index 99394422ee1..c4568119429 100644 --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/InstIterator.h" #include "llvm/Pass.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -39,11 +40,29 @@ struct CoroElide : FunctionPass { bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AAResultsWrapperPass>(); AU.setPreservesCFG(); } }; } +char CoroElide::ID = 0; +INITIALIZE_PASS_BEGIN( + CoroElide, "coro-elide", + "Coroutine frame allocation elision and indirect calls replacement", false, + false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END( + CoroElide, "coro-elide", + "Coroutine frame allocation elision and indirect calls replacement", false, + false) + +Pass *llvm::createCoroElidePass() { return new CoroElide(); } + +//===----------------------------------------------------------------------===// +// Implementation +//===----------------------------------------------------------------------===// + // Go through the list of coro.subfn.addr intrinsics and replace them with the // provided constant. static void replaceWithConstant(Constant *Value, @@ -68,24 +87,103 @@ static void replaceWithConstant(Constant *Value, replaceAndRecursivelySimplify(I, Value); } +// See if any operand of the call instruction references the coroutine frame. +static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA) { + for (Value *Op : CI->operand_values()) + if (AA.alias(Op, Frame) != NoAlias) + return true; + return false; +} + +// Look for any tail calls referencing the coroutine frame and remove tail +// attribute from them, since now coroutine frame resides on the stack and tail +// call implies that the function does not references anything on the stack. +static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) { + Function &F = *Frame->getFunction(); + MemoryLocation Mem(Frame); + for (Instruction &I : instructions(F)) + if (auto *Call = dyn_cast<CallInst>(&I)) + if (Call->isTailCall() && operandReferences(Call, Frame, AA)) { + // FIXME: If we ever hit this check. Evaluate whether it is more + // appropriate to retain musttail and allow the code to compile. + if (Call->isMustTailCall()) + report_fatal_error("Call referring to the coroutine frame cannot be " + "marked as musttail"); + Call->setTailCall(false); + } +} + +// Given a resume function @f.resume(%f.frame* %frame), returns %f.frame type. +static Type *getFrameType(Function *Resume) { + auto *ArgType = Resume->getArgumentList().front().getType(); + return cast<PointerType>(ArgType)->getElementType(); +} + +// Finds first non alloca instruction in the entry block of a function. +static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) { + for (Instruction &I : F->getEntryBlock()) + if (!isa<AllocaInst>(&I)) + return &I; + llvm_unreachable("no terminator in the entry block"); +} + +// To elide heap allocations we need to suppress code blocks guarded by +// llvm.coro.alloc and llvm.coro.free instructions. +static void elideHeapAllocations(CoroBeginInst *CoroBegin, Type *FrameTy, + CoroAllocInst *AllocInst, AAResults &AA) { + LLVMContext &C = CoroBegin->getContext(); + auto *InsertPt = getFirstNonAllocaInTheEntryBlock(CoroBegin->getFunction()); + + // FIXME: Design how to transmit alignment information for every alloca that + // is spilled into the coroutine frame and recreate the alignment information + // here. Possibly we will need to do a mini SROA here and break the coroutine + // frame into individual AllocaInst recreating the original alignment. + auto *Frame = new AllocaInst(FrameTy, "", InsertPt); + auto *FrameVoidPtr = + new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt); + + // Replacing llvm.coro.alloc with non-null value will suppress dynamic + // allocation as it is expected for the frontend to generate the code that + // looks like: + // mem = coro.alloc(); + // if (!mem) mem = malloc(coro.size()); + // coro.begin(mem, ...) + AllocInst->replaceAllUsesWith(FrameVoidPtr); + AllocInst->eraseFromParent(); + + // To suppress deallocation code, we replace all llvm.coro.free intrinsics + // associated with this coro.begin with null constant. + auto *NullPtr = ConstantPointerNull::get(Type::getInt8PtrTy(C)); + coro::replaceAllCoroFrees(CoroBegin, NullPtr); + CoroBegin->lowerTo(FrameVoidPtr); + + // Since now coroutine frame lives on the stack we need to make sure that + // any tail call referencing it, must be made non-tail call. + removeTailCallAttribute(Frame, AA); +} + // See if there are any coro.subfn.addr intrinsics directly referencing // the coro.begin. If found, replace them with an appropriate coroutine // subfunction associated with that coro.begin. -static bool replaceIndirectCalls(CoroBeginInst *CoroBegin) { +static bool replaceIndirectCalls(CoroBeginInst *CoroBegin, AAResults &AA) { SmallVector<CoroSubFnInst *, 8> ResumeAddr; SmallVector<CoroSubFnInst *, 8> DestroyAddr; - for (User *U : CoroBegin->users()) { - if (auto *II = dyn_cast<CoroSubFnInst>(U)) { - switch (II->getIndex()) { - case CoroSubFnInst::ResumeIndex: - ResumeAddr.push_back(II); - break; - case CoroSubFnInst::DestroyIndex: - DestroyAddr.push_back(II); - break; - default: - llvm_unreachable("unexpected coro.subfn.addr constant"); + for (User *CF : CoroBegin->users()) { + assert(isa<CoroFrameInst>(CF) && + "CoroBegin can be only used by coro.frame instructions"); + for (User *U : CF->users()) { + if (auto *II = dyn_cast<CoroSubFnInst>(U)) { + switch (II->getIndex()) { + case CoroSubFnInst::ResumeIndex: + ResumeAddr.push_back(II); + break; + case CoroSubFnInst::DestroyIndex: + DestroyAddr.push_back(II); + break; + default: + llvm_unreachable("unexpected coro.subfn.addr constant"); + } } } } @@ -99,11 +197,28 @@ static bool replaceIndirectCalls(CoroBeginInst *CoroBegin) { "of coroutine subfunctions"); auto *ResumeAddrConstant = ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::ResumeIndex); + replaceWithConstant(ResumeAddrConstant, ResumeAddr); + + if (DestroyAddr.empty()) + return true; + auto *DestroyAddrConstant = ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::DestroyIndex); - - replaceWithConstant(ResumeAddrConstant, ResumeAddr); replaceWithConstant(DestroyAddrConstant, DestroyAddr); + + // If llvm.coro.begin refers to llvm.coro.alloc, we can elide the allocation. + if (auto *AllocInst = CoroBegin->getAlloc()) { + // FIXME: The check above is overly lax. It only checks for whether we have + // an ability to elide heap allocations, not whether it is safe to do so. + // We need to do something like: + // If for every exit from the function where coro.begin is + // live, there is a coro.free or coro.destroy dominating that exit block, + // then it is safe to elide heap allocation, since the lifetime of coroutine + // is fully enclosed in its caller. + auto *FrameTy = getFrameType(cast<Function>(ResumeAddrConstant)); + elideHeapAllocations(CoroBegin, FrameTy, AllocInst, AA); + } + return true; } @@ -143,20 +258,9 @@ bool CoroElide::runOnFunction(Function &F) { if (CoroBegins.empty()) return Changed; + AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); for (auto *CB : CoroBegins) - Changed |= replaceIndirectCalls(CB); + Changed |= replaceIndirectCalls(CB, AA); return Changed; } - -char CoroElide::ID = 0; -INITIALIZE_PASS_BEGIN( - CoroElide, "coro-elide", - "Coroutine frame allocation elision and indirect calls replacement", false, - false) -INITIALIZE_PASS_END( - CoroElide, "coro-elide", - "Coroutine frame allocation elision and indirect calls replacement", false, - false) - -Pass *llvm::createCoroElidePass() { return new CoroElide(); } diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h index d3470e1de41..1aa1f493edb 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInstr.h +++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h @@ -62,11 +62,57 @@ public: } }; +/// This represents the llvm.coro.alloc instruction. +class LLVM_LIBRARY_VISIBILITY CoroAllocInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_alloc; + } + static inline bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + +/// This represents the llvm.coro.frame instruction. +class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_frame; + } + static inline bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + +/// This represents the llvm.coro.free instruction. +class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst { +public: + // Methods to support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::coro_free; + } + static inline bool classof(const Value *V) { + return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V)); + } +}; + /// This class represents the llvm.coro.begin instruction. class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst { - enum { MemArg, AlignArg, PromiseArg, InfoArg }; + enum { MemArg, ElideArg, AlignArg, PromiseArg, InfoArg }; public: + CoroAllocInst *getAlloc() const { + if (auto *CAI = dyn_cast<CoroAllocInst>( + getArgOperand(ElideArg)->stripPointerCasts())) + return CAI; + + return nullptr; + } + + Value *getMem() const { return getArgOperand(MemArg); } + Constant *getRawInfo() const { return cast<Constant>(getArgOperand(InfoArg)->stripPointerCasts()); } @@ -108,6 +154,22 @@ public: return Result; } + // Replaces all coro.frame intrinsics that are associated with this coro.begin + // to a replacement value and removes coro.begin and all of the coro.frame + // intrinsics. + void lowerTo(Value* Replacement) { + SmallVector<CoroFrameInst*, 4> FrameInsts; + for (auto *CF : this->users()) + FrameInsts.push_back(cast<CoroFrameInst>(CF)); + + for (auto *CF : FrameInsts) { + CF->replaceAllUsesWith(Replacement); + CF->eraseFromParent(); + } + + this->eraseFromParent(); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::coro_begin; diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index e2ffe79867a..a057597194d 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -42,6 +42,7 @@ void initializeCoroCleanupPass(PassRegistry &); namespace coro { bool declaresIntrinsics(Module &M, std::initializer_list<StringRef>); +void replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement); // Keeps data and helper functions for lowering coroutine intrinsics. struct LowererBase { diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index f9661f3575c..569ee9f2f40 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -122,3 +122,21 @@ bool coro::declaresIntrinsics(Module &M, return false; } + +// Find all llvm.coro.free instructions associated with the provided coro.begin +// and replace them with the provided replacement value. +void coro::replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement) { + SmallVector<CoroFreeInst *, 4> CoroFrees; + for (User *FramePtr: CB->users()) + for (User *U : FramePtr->users()) + if (auto *CF = dyn_cast<CoroFreeInst>(U)) + CoroFrees.push_back(CF); + + if (CoroFrees.empty()) + return; + + for (CoroFreeInst *CF : CoroFrees) { + CF->replaceAllUsesWith(Replacement); + CF->eraseFromParent(); + } +} |