summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/docs/Coroutines.rst73
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td10
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroElide.cpp158
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInstr.h64
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInternal.h1
-rw-r--r--llvm/lib/Transforms/Coroutines/Coroutines.cpp18
-rw-r--r--llvm/test/Transforms/Coroutines/coro-elide.ll15
-rw-r--r--llvm/test/Transforms/Coroutines/coro-heap-elide.ll125
-rw-r--r--llvm/test/Transforms/Coroutines/restart-trigger.ll16
9 files changed, 410 insertions, 70 deletions
diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst
index a54c18a3537..7a12641babf 100644
--- a/llvm/docs/Coroutines.rst
+++ b/llvm/docs/Coroutines.rst
@@ -95,7 +95,8 @@ The LLVM IR for this coroutine looks like this:
entry:
%size = call i32 @llvm.coro.size.i32()
%alloc = call i8* @malloc(i32 %size)
- %hdl = call noalias i8* @llvm.coro.begin(i8* %alloc, i32 0, i8* null, i8* null)
+ %beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* null, i8* null)
+ %hdl = call noalias i8* @llvm.coro.frame(token %beg)
br label %loop
loop:
%n.val = phi i32 [ %n, %entry ], [ %inc, %loop ]
@@ -115,9 +116,10 @@ The LLVM IR for this coroutine looks like this:
The `entry` block establishes the coroutine frame. The `coro.size`_ intrinsic is
lowered to a constant representing the size required for the coroutine frame.
-The `coro.begin`_ intrinsic initializes the coroutine frame and returns the
-coroutine handle. The first parameter of `coro.begin` is given a block of memory
-to be used if the coroutine frame needs to be allocated dynamically.
+The `coro.begin`_ intrinsic initializes the coroutine frame and returns the a
+token that is used to obtain the coroutine handle via `coro.frame` intrinsic.
+The first parameter of `coro.begin` is given a block of memory to be used if the
+coroutine frame needs to be allocated dynamically.
The `cleanup` block destroys the coroutine frame. The `coro.free`_ intrinsic,
given the coroutine handle, returns a pointer of the memory block to be freed or
@@ -160,12 +162,13 @@ After resume and destroy parts are outlined, function `f` will contain only the
code responsible for creation and initialization of the coroutine frame and
execution of the coroutine until a suspend point is reached:
-.. code-block:: llvm
+.. code-block:: none
define i8* @f(i32 %n) {
entry:
%alloc = call noalias i8* @malloc(i32 24)
- %0 = call noalias i8* @llvm.coro.begin(i8* %alloc, i32 0, i8* null, i8* null)
+ %beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* null, i8* null)
+ %0 = call i8* @llvm.coro.frame(token %beg)
%frame = bitcast i8* %0 to %f.frame*
%1 = getelementptr %f.frame, %f.frame* %frame, i32 0, i32 0
store void (%f.frame*)* @f.resume, void (%f.frame*)** %1
@@ -219,7 +222,7 @@ In the entry block, we will call `coro.alloc`_ intrinsic that will return `null`
when dynamic allocation is required, and an address of an alloca on the caller's
frame where coroutine frame can be stored if dynamic allocation is elided.
-.. code-block:: llvm
+.. code-block:: none
entry:
%elide = call i8* @llvm.coro.alloc()
@@ -231,7 +234,7 @@ frame where coroutine frame can be stored if dynamic allocation is elided.
br label %coro.begin
coro.begin:
%phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ]
- %hdl = call noalias i8* @llvm.coro.begin(i8* %phi, i32 0, i8* null, i8* null)
+ %beg = call token @llvm.coro.begin(i8* %phi, i8* null, i32 0, i8* null, i8* null)
In the cleanup block, we will make freeing the coroutine frame conditional on
`coro.free`_ intrinsic. If allocation is elided, `coro.free`_ returns `null`
@@ -421,7 +424,8 @@ store the current value produced by a coroutine.
br label %coro.begin
coro.begin:
%phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ]
- %hdl = call noalias i8* @llvm.coro.begin(i8* %phi, i32 0, i8* %pv, i8* null)
+ %beg = call token @llvm.coro.begin(i8* %phi, i8* %elide, i32 0, i8* %pv, i8* null)
+ %hdl = call i8* @llvm.coro.frame(token %beg)
br label %loop
loop:
%n.val = phi i32 [ %n, %coro.begin ], [ %inc, %loop ]
@@ -687,15 +691,16 @@ a coroutine user are responsible to makes sure there is no data races.
Example:
""""""""
-.. code-block:: llvm
+.. code-block:: text
define i8* @f(i32 %n) {
entry:
%promise = alloca i32
%pv = bitcast i32* %promise to i8*
...
- ; the third argument to coro.begin points to the coroutine promise.
- %hdl = call noalias i8* @llvm.coro.begin(i8* %alloc, i32 0, i8* %pv, i8* null)
+ ; the fourth argument to coro.begin points to the coroutine promise.
+ %beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* %pv, i8* null)
+ %hdl = call noalias i8* @llvm.coro.frame(token %beg)
...
store i32 42, i32* %promise ; store something into the promise
...
@@ -752,12 +757,14 @@ the coroutine frame.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
- declare i8* @llvm.coro.begin(i8* <mem>, i32 <align>, i8* <promise>, i8* <fnaddr>)
+ declare i8* @llvm.coro.begin(i8* <mem>, i8* <elide>, i32 <align>, i8* <promise>, i8* <fnaddr>)
Overview:
"""""""""
-The '``llvm.coro.begin``' intrinsic returns an address of the coroutine frame.
+The '``llvm.coro.begin``' intrinsic captures coroutine initialization
+information and returns a token that can be used by `coro.frame` intrinsic to
+return an address of the coroutine frame.
Arguments:
""""""""""
@@ -765,15 +772,17 @@ Arguments:
The first argument is a pointer to a block of memory where coroutine frame
will be stored.
-The second argument provides information on the alignment of the memory returned
+The second argument is either null or an SSA value of `coro.alloc` intrinsic.
+
+The third argument provides information on the alignment of the memory returned
by the allocation function and given to `coro.begin` by the first argument. If
this argument is 0, the memory is assumed to be aligned to 2 * sizeof(i8*).
This argument only accepts constants.
-The third argument, if not `null`, designates a particular alloca instruction to
+The fourth argument, if not `null`, designates a particular alloca instruction to
be a `coroutine promise`_.
-The fourth argument is `null` before coroutine is split, and later is replaced
+The fifth argument is `null` before coroutine is split, and later is replaced
to point to a private global constant array containing function pointers to
outlined resume and destroy parts of the coroutine.
@@ -781,10 +790,10 @@ Semantics:
""""""""""
Depending on the alignment requirements of the objects in the coroutine frame
-and/or on the codegen compactness reasons the pointer returned from `coro.begin`
-may be at offset to the `%mem` argument. (This could be beneficial if
-instructions that express relative access to data can be more compactly encoded
-with small positive and negative offsets).
+and/or on the codegen compactness reasons the pointer returned from `coro.frame`
+associated with a particular `coro.begin` may be at offset to the `%mem`
+argument. (This could be beneficial if instructions that express relative access
+to data can be more compactly encoded with small positive and negative offsets).
A frontend should emit exactly one `coro.begin` intrinsic per coroutine.
@@ -807,7 +816,7 @@ Arguments:
""""""""""
A pointer to the coroutine frame. This should be the same pointer that was
-returned by prior `coro.begin` call.
+returned by prior `coro.frame` call.
Example (custom deallocation function):
"""""""""""""""""""""""""""""""""""""""
@@ -862,10 +871,13 @@ alloca storing the coroutine frame. Otherwise, it is lowered to constant `null`.
A frontend should emit at most one `coro.alloc` intrinsic per coroutine.
+If `coro.alloc` is present, the second parameter to `coro.begin` should refer
+to it.
+
Example:
""""""""
-.. code-block:: llvm
+.. code-block:: text
entry:
%elide = call i8* @llvm.coro.alloc()
@@ -879,7 +891,8 @@ Example:
coro.begin:
%phi = phi i8* [ %elide, %entry ], [ %alloc, %coro.alloc ]
- %frame = call i8* @llvm.coro.begin(i8* %phi, i32 0, i8* null, i8* null)
+ %beg = call token @llvm.coro.begin(i8* %phi, i8* %elide, i32 0, i8* null, i8* null)
+ %frame = call i8* @llvm.coro.frame(token %beg)
.. _coro.frame:
@@ -898,14 +911,12 @@ the enclosing coroutine.
Arguments:
""""""""""
-None
+A token that refers to `coro.begin` instruction.
Semantics:
""""""""""
-This intrinsic is lowered to refer to the `coro.begin`_ instruction. This is
-a frontend convenience intrinsic that makes it easier to refer to the
-coroutine frame.
+This intrinsic is lowered to refer to address of the coroutine frame.
.. _coro.end:
@@ -1164,7 +1175,7 @@ CoroElide
---------
The pass CoroElide examines if the inlined coroutine is eligible for heap
allocation elision optimization. If so, it replaces `coro.alloc` and
-`coro.begin` intrinsic with an address of a coroutine frame placed on its caller
+`coro.frame` intrinsic with an address of a coroutine frame placed on its caller
and replaces `coro.free` intrinsics with `null` to remove the deallocation code.
This pass also replaces `coro.resume` and `coro.destroy` intrinsics with direct
calls to resume and destroy functions for a particular coroutine where possible.
@@ -1178,11 +1189,11 @@ Upstreaming sequence (rough plan)
=================================
#. Add documentation.
#. Add coroutine intrinsics.
-#. Add empty coroutine passes. <== we are here
+#. Add empty coroutine passes.
#. Add coroutine devirtualization + tests.
#. Add CGSCC restart trigger + tests.
#. Add coroutine heap elision + tests.
-#. Add custom allocation heap elision + tests.
+#. Add custom allocation heap elision + tests. <== we are here
#. Add coroutine splitting logic + tests.
#. Add simple coroutine frame builder + tests.
#. Add the rest of the logic + tests. (Maybe split further as needed).
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index b39b3fb83d7..3dcbee9f250 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -603,16 +603,16 @@ def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty],
// Coroutine Structure Intrinsics.
def int_coro_alloc : Intrinsic<[llvm_ptr_ty], [], []>;
-def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty,
- llvm_ptr_ty, llvm_ptr_ty],
- [WriteOnly<0>, ReadNone<2>, ReadOnly<3>,
- NoCapture<3>]>;
+def int_coro_begin : Intrinsic<[llvm_token_ty], [llvm_ptr_ty, llvm_ptr_ty,
+ llvm_i32_ty, llvm_ptr_ty, llvm_ptr_ty],
+ [WriteOnly<0>, WriteOnly<0>,
+ ReadNone<3>, ReadOnly<4>, NoCapture<4>]>;
def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty],
[IntrArgMemOnly, ReadOnly<0>, NoCapture<0>]>;
def int_coro_end : Intrinsic<[], [llvm_ptr_ty, llvm_i1_ty], []>;
-def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
+def int_coro_frame : Intrinsic<[llvm_ptr_ty], [llvm_token_ty], [IntrNoMem]>;
def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>;
diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index 99394422ee1..c4568119429 100644
--- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -39,11 +40,29 @@ struct CoroElide : FunctionPass {
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AAResultsWrapperPass>();
AU.setPreservesCFG();
}
};
}
+char CoroElide::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ CoroElide, "coro-elide",
+ "Coroutine frame allocation elision and indirect calls replacement", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(
+ CoroElide, "coro-elide",
+ "Coroutine frame allocation elision and indirect calls replacement", false,
+ false)
+
+Pass *llvm::createCoroElidePass() { return new CoroElide(); }
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
// Go through the list of coro.subfn.addr intrinsics and replace them with the
// provided constant.
static void replaceWithConstant(Constant *Value,
@@ -68,24 +87,103 @@ static void replaceWithConstant(Constant *Value,
replaceAndRecursivelySimplify(I, Value);
}
+// See if any operand of the call instruction references the coroutine frame.
+static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA) {
+ for (Value *Op : CI->operand_values())
+ if (AA.alias(Op, Frame) != NoAlias)
+ return true;
+ return false;
+}
+
+// Look for any tail calls referencing the coroutine frame and remove tail
+// attribute from them, since now coroutine frame resides on the stack and tail
+// call implies that the function does not references anything on the stack.
+static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) {
+ Function &F = *Frame->getFunction();
+ MemoryLocation Mem(Frame);
+ for (Instruction &I : instructions(F))
+ if (auto *Call = dyn_cast<CallInst>(&I))
+ if (Call->isTailCall() && operandReferences(Call, Frame, AA)) {
+ // FIXME: If we ever hit this check. Evaluate whether it is more
+ // appropriate to retain musttail and allow the code to compile.
+ if (Call->isMustTailCall())
+ report_fatal_error("Call referring to the coroutine frame cannot be "
+ "marked as musttail");
+ Call->setTailCall(false);
+ }
+}
+
+// Given a resume function @f.resume(%f.frame* %frame), returns %f.frame type.
+static Type *getFrameType(Function *Resume) {
+ auto *ArgType = Resume->getArgumentList().front().getType();
+ return cast<PointerType>(ArgType)->getElementType();
+}
+
+// Finds first non alloca instruction in the entry block of a function.
+static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
+ for (Instruction &I : F->getEntryBlock())
+ if (!isa<AllocaInst>(&I))
+ return &I;
+ llvm_unreachable("no terminator in the entry block");
+}
+
+// To elide heap allocations we need to suppress code blocks guarded by
+// llvm.coro.alloc and llvm.coro.free instructions.
+static void elideHeapAllocations(CoroBeginInst *CoroBegin, Type *FrameTy,
+ CoroAllocInst *AllocInst, AAResults &AA) {
+ LLVMContext &C = CoroBegin->getContext();
+ auto *InsertPt = getFirstNonAllocaInTheEntryBlock(CoroBegin->getFunction());
+
+ // FIXME: Design how to transmit alignment information for every alloca that
+ // is spilled into the coroutine frame and recreate the alignment information
+ // here. Possibly we will need to do a mini SROA here and break the coroutine
+ // frame into individual AllocaInst recreating the original alignment.
+ auto *Frame = new AllocaInst(FrameTy, "", InsertPt);
+ auto *FrameVoidPtr =
+ new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt);
+
+ // Replacing llvm.coro.alloc with non-null value will suppress dynamic
+ // allocation as it is expected for the frontend to generate the code that
+ // looks like:
+ // mem = coro.alloc();
+ // if (!mem) mem = malloc(coro.size());
+ // coro.begin(mem, ...)
+ AllocInst->replaceAllUsesWith(FrameVoidPtr);
+ AllocInst->eraseFromParent();
+
+ // To suppress deallocation code, we replace all llvm.coro.free intrinsics
+ // associated with this coro.begin with null constant.
+ auto *NullPtr = ConstantPointerNull::get(Type::getInt8PtrTy(C));
+ coro::replaceAllCoroFrees(CoroBegin, NullPtr);
+ CoroBegin->lowerTo(FrameVoidPtr);
+
+ // Since now coroutine frame lives on the stack we need to make sure that
+ // any tail call referencing it, must be made non-tail call.
+ removeTailCallAttribute(Frame, AA);
+}
+
// See if there are any coro.subfn.addr intrinsics directly referencing
// the coro.begin. If found, replace them with an appropriate coroutine
// subfunction associated with that coro.begin.
-static bool replaceIndirectCalls(CoroBeginInst *CoroBegin) {
+static bool replaceIndirectCalls(CoroBeginInst *CoroBegin, AAResults &AA) {
SmallVector<CoroSubFnInst *, 8> ResumeAddr;
SmallVector<CoroSubFnInst *, 8> DestroyAddr;
- for (User *U : CoroBegin->users()) {
- if (auto *II = dyn_cast<CoroSubFnInst>(U)) {
- switch (II->getIndex()) {
- case CoroSubFnInst::ResumeIndex:
- ResumeAddr.push_back(II);
- break;
- case CoroSubFnInst::DestroyIndex:
- DestroyAddr.push_back(II);
- break;
- default:
- llvm_unreachable("unexpected coro.subfn.addr constant");
+ for (User *CF : CoroBegin->users()) {
+ assert(isa<CoroFrameInst>(CF) &&
+ "CoroBegin can be only used by coro.frame instructions");
+ for (User *U : CF->users()) {
+ if (auto *II = dyn_cast<CoroSubFnInst>(U)) {
+ switch (II->getIndex()) {
+ case CoroSubFnInst::ResumeIndex:
+ ResumeAddr.push_back(II);
+ break;
+ case CoroSubFnInst::DestroyIndex:
+ DestroyAddr.push_back(II);
+ break;
+ default:
+ llvm_unreachable("unexpected coro.subfn.addr constant");
+ }
}
}
}
@@ -99,11 +197,28 @@ static bool replaceIndirectCalls(CoroBeginInst *CoroBegin) {
"of coroutine subfunctions");
auto *ResumeAddrConstant =
ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::ResumeIndex);
+ replaceWithConstant(ResumeAddrConstant, ResumeAddr);
+
+ if (DestroyAddr.empty())
+ return true;
+
auto *DestroyAddrConstant =
ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::DestroyIndex);
-
- replaceWithConstant(ResumeAddrConstant, ResumeAddr);
replaceWithConstant(DestroyAddrConstant, DestroyAddr);
+
+ // If llvm.coro.begin refers to llvm.coro.alloc, we can elide the allocation.
+ if (auto *AllocInst = CoroBegin->getAlloc()) {
+ // FIXME: The check above is overly lax. It only checks for whether we have
+ // an ability to elide heap allocations, not whether it is safe to do so.
+ // We need to do something like:
+ // If for every exit from the function where coro.begin is
+ // live, there is a coro.free or coro.destroy dominating that exit block,
+ // then it is safe to elide heap allocation, since the lifetime of coroutine
+ // is fully enclosed in its caller.
+ auto *FrameTy = getFrameType(cast<Function>(ResumeAddrConstant));
+ elideHeapAllocations(CoroBegin, FrameTy, AllocInst, AA);
+ }
+
return true;
}
@@ -143,20 +258,9 @@ bool CoroElide::runOnFunction(Function &F) {
if (CoroBegins.empty())
return Changed;
+ AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
for (auto *CB : CoroBegins)
- Changed |= replaceIndirectCalls(CB);
+ Changed |= replaceIndirectCalls(CB, AA);
return Changed;
}
-
-char CoroElide::ID = 0;
-INITIALIZE_PASS_BEGIN(
- CoroElide, "coro-elide",
- "Coroutine frame allocation elision and indirect calls replacement", false,
- false)
-INITIALIZE_PASS_END(
- CoroElide, "coro-elide",
- "Coroutine frame allocation elision and indirect calls replacement", false,
- false)
-
-Pass *llvm::createCoroElidePass() { return new CoroElide(); }
diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h
index d3470e1de41..1aa1f493edb 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -62,11 +62,57 @@ public:
}
};
+/// This represents the llvm.coro.alloc instruction.
+class LLVM_LIBRARY_VISIBILITY CoroAllocInst : public IntrinsicInst {
+public:
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_alloc;
+ }
+ static inline bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This represents the llvm.coro.frame instruction.
+class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst {
+public:
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_frame;
+ }
+ static inline bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
+/// This represents the llvm.coro.free instruction.
+class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst {
+public:
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_free;
+ }
+ static inline bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
/// This class represents the llvm.coro.begin instruction.
class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst {
- enum { MemArg, AlignArg, PromiseArg, InfoArg };
+ enum { MemArg, ElideArg, AlignArg, PromiseArg, InfoArg };
public:
+ CoroAllocInst *getAlloc() const {
+ if (auto *CAI = dyn_cast<CoroAllocInst>(
+ getArgOperand(ElideArg)->stripPointerCasts()))
+ return CAI;
+
+ return nullptr;
+ }
+
+ Value *getMem() const { return getArgOperand(MemArg); }
+
Constant *getRawInfo() const {
return cast<Constant>(getArgOperand(InfoArg)->stripPointerCasts());
}
@@ -108,6 +154,22 @@ public:
return Result;
}
+ // Replaces all coro.frame intrinsics that are associated with this coro.begin
+ // to a replacement value and removes coro.begin and all of the coro.frame
+ // intrinsics.
+ void lowerTo(Value* Replacement) {
+ SmallVector<CoroFrameInst*, 4> FrameInsts;
+ for (auto *CF : this->users())
+ FrameInsts.push_back(cast<CoroFrameInst>(CF));
+
+ for (auto *CF : FrameInsts) {
+ CF->replaceAllUsesWith(Replacement);
+ CF->eraseFromParent();
+ }
+
+ this->eraseFromParent();
+ }
+
// Methods for support type inquiry through isa, cast, and dyn_cast:
static inline bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_begin;
diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index e2ffe79867a..a057597194d 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -42,6 +42,7 @@ void initializeCoroCleanupPass(PassRegistry &);
namespace coro {
bool declaresIntrinsics(Module &M, std::initializer_list<StringRef>);
+void replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement);
// Keeps data and helper functions for lowering coroutine intrinsics.
struct LowererBase {
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index f9661f3575c..569ee9f2f40 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -122,3 +122,21 @@ bool coro::declaresIntrinsics(Module &M,
return false;
}
+
+// Find all llvm.coro.free instructions associated with the provided coro.begin
+// and replace them with the provided replacement value.
+void coro::replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement) {
+ SmallVector<CoroFreeInst *, 4> CoroFrees;
+ for (User *FramePtr: CB->users())
+ for (User *U : FramePtr->users())
+ if (auto *CF = dyn_cast<CoroFreeInst>(U))
+ CoroFrees.push_back(CF);
+
+ if (CoroFrees.empty())
+ return;
+
+ for (CoroFreeInst *CF : CoroFrees) {
+ CF->replaceAllUsesWith(Replacement);
+ CF->eraseFromParent();
+ }
+}
diff --git a/llvm/test/Transforms/Coroutines/coro-elide.ll b/llvm/test/Transforms/Coroutines/coro-elide.ll
index ade5f4d8618..5244b74a21a 100644
--- a/llvm/test/Transforms/Coroutines/coro-elide.ll
+++ b/llvm/test/Transforms/Coroutines/coro-elide.ll
@@ -1,6 +1,6 @@
; Tests that the coro.destroy and coro.resume are devirtualized where possible,
; SCC pipeline restarts and inlines the direct calls.
-; RUN: opt < %s -S -inline -coro-elide | FileCheck %s
+; RUN: opt < %s -S -inline -coro-elide -dce | FileCheck %s
declare void @print(i32) nounwind
@@ -22,15 +22,16 @@ define fastcc void @f.destroy(i8*) {
; a coroutine start function
define i8* @f() {
entry:
- %hdl = call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
+ %tok = call token @llvm.coro.begin(i8* null, i8* null, i32 0, i8* null,
i8* bitcast ([2 x void (i8*)*]* @f.resumers to i8*))
+ %hdl = call i8* @llvm.coro.frame(token %tok)
ret i8* %hdl
}
; CHECK-LABEL: @callResume(
define void @callResume() {
entry:
-; CHECK: call i8* @llvm.coro.begin
+; CHECK: call token @llvm.coro.begin
%hdl = call i8* @f()
; CHECK-NEXT: call void @print(i32 0)
@@ -50,7 +51,7 @@ entry:
; CHECK-LABEL: @eh(
define void @eh() personality i8* null {
entry:
-; CHECK: call i8* @llvm.coro.begin
+; CHECK: call token @llvm.coro.begin
%hdl = call i8* @f()
; CHECK-NEXT: call void @print(i32 0)
@@ -70,7 +71,8 @@ ehcleanup:
; no devirtualization here, since coro.begin info parameter is null
define void @no_devirt_info_null() {
entry:
- %hdl = call i8* @llvm.coro.begin(i8* null, i32 0, i8* null, i8* null)
+ %tok = call token @llvm.coro.begin(i8* null, i8* null, i32 0, i8* null, i8* null)
+ %hdl = call i8* @llvm.coro.frame(token %tok)
; CHECK: call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
%0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
@@ -106,5 +108,6 @@ entry:
}
-declare i8* @llvm.coro.begin(i8*, i32, i8*, i8*)
+declare token @llvm.coro.begin(i8*, i8*, i32, i8*, i8*)
+declare i8* @llvm.coro.frame(token)
declare i8* @llvm.coro.subfn.addr(i8*, i8)
diff --git a/llvm/test/Transforms/Coroutines/coro-heap-elide.ll b/llvm/test/Transforms/Coroutines/coro-heap-elide.ll
new file mode 100644
index 00000000000..7f9db05acc2
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-heap-elide.ll
@@ -0,0 +1,125 @@
+; Tests that the dynamic allocation and deallocation of the coroutine frame is
+; elided and any tail calls referencing the coroutine frame has the tail
+; call attribute removed.
+; RUN: opt < %s -S -inline -coro-elide -instsimplify -simplifycfg | FileCheck %s
+
+declare void @print(i32) nounwind
+
+%f.frame = type {i32}
+
+declare void @bar(i8*)
+
+declare fastcc void @f.resume(%f.frame*)
+declare fastcc void @f.destroy(%f.frame*)
+
+declare void @may_throw()
+declare i8* @CustomAlloc(i32)
+declare void @CustomFree(i8*)
+
+@f.resumers = internal constant
+ [2 x void (%f.frame*)*] [void (%f.frame*)* @f.resume, void (%f.frame*)* @f.destroy]
+
+; a coroutine start function
+define i8* @f() personality i8* null {
+entry:
+ %elide = call i8* @llvm.coro.alloc()
+ %need.dyn.alloc = icmp ne i8* %elide, null
+ br i1 %need.dyn.alloc, label %coro.begin, label %dyn.alloc
+dyn.alloc:
+ %alloc = call i8* @CustomAlloc(i32 4)
+ br label %coro.begin
+coro.begin:
+ %phi = phi i8* [ %elide, %entry ], [ %alloc, %dyn.alloc ]
+ %beg = call token @llvm.coro.begin(i8* %phi, i8* %elide, i32 0, i8* null,
+ i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*))
+ %hdl = call i8* @llvm.coro.frame(token %beg)
+ invoke void @may_throw()
+ to label %ret unwind label %ehcleanup
+ret:
+ ret i8* %hdl
+
+ehcleanup:
+ %tok = cleanuppad within none []
+ %mem = call i8* @llvm.coro.free(i8* %hdl)
+ %need.dyn.free = icmp ne i8* %mem, null
+ br i1 %need.dyn.free, label %dyn.free, label %if.end
+dyn.free:
+ call void @CustomFree(i8* %mem)
+ br label %if.end
+if.end:
+ cleanupret from %tok unwind to caller
+}
+
+; CHECK-LABEL: @callResume(
+define void @callResume() {
+entry:
+; CHECK: alloca %f.frame
+; CHECK-NOT: coro.begin
+; CHECK-NOT: CustomAlloc
+; CHECK: call void @may_throw()
+ %hdl = call i8* @f()
+
+; Need to remove 'tail' from the first call to @bar
+; CHECK-NOT: tail call void @bar(
+; CHECK: call void @bar(
+ tail call void @bar(i8* %hdl)
+; CHECK: tail call void @bar(
+ tail call void @bar(i8* null)
+
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8* %vFrame)
+ %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+ %1 = bitcast i8* %0 to void (i8*)*
+ call fastcc void %1(i8* %hdl)
+
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8* %vFrame)
+ %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+ %3 = bitcast i8* %2 to void (i8*)*
+ call fastcc void %3(i8* %hdl)
+
+; CHECK-NEXT: ret void
+ ret void
+}
+
+; a coroutine start function (cannot elide heap alloc, due to second argument to
+; coro.begin not pointint to coro.alloc)
+define i8* @f_no_elision() personality i8* null {
+entry:
+ %alloc = call i8* @CustomAlloc(i32 4)
+ %beg = call token @llvm.coro.begin(i8* %alloc, i8* null, i32 0, i8* null,
+ i8* bitcast ([2 x void (%f.frame*)*]* @f.resumers to i8*))
+ %hdl = call i8* @llvm.coro.frame(token %beg)
+ ret i8* %hdl
+}
+
+; CHECK-LABEL: @callResume_no_elision(
+define void @callResume_no_elision() {
+entry:
+; CHECK: call i8* @CustomAlloc(
+ %hdl = call i8* @f_no_elision()
+
+; Tail call should remain tail calls
+; CHECK: tail call void @bar(
+ tail call void @bar(i8* %hdl)
+; CHECK: tail call void @bar(
+ tail call void @bar(i8* null)
+
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.resume to void (i8*)*)(i8*
+ %0 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 0)
+ %1 = bitcast i8* %0 to void (i8*)*
+ call fastcc void %1(i8* %hdl)
+
+; CHECK-NEXT: call fastcc void bitcast (void (%f.frame*)* @f.destroy to void (i8*)*)(i8*
+ %2 = call i8* @llvm.coro.subfn.addr(i8* %hdl, i8 1)
+ %3 = bitcast i8* %2 to void (i8*)*
+ call fastcc void %3(i8* %hdl)
+
+; CHECK-NEXT: ret void
+ ret void
+}
+
+
+declare i8* @llvm.coro.alloc()
+declare i8* @llvm.coro.free(i8*)
+declare token @llvm.coro.begin(i8*, i8*, i32, i8*, i8*)
+declare i8* @llvm.coro.frame(token)
+declare i8* @llvm.coro.subfn.addr(i8*, i8)
diff --git a/llvm/test/Transforms/Coroutines/restart-trigger.ll b/llvm/test/Transforms/Coroutines/restart-trigger.ll
new file mode 100644
index 00000000000..7cc0bb98f76
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/restart-trigger.ll
@@ -0,0 +1,16 @@
+; Verifies that restart trigger forces IPO pipelines restart and the same
+; coroutine is looked at by CoroSplit pass twice.
+; REQUIRES: asserts
+; RUN: opt < %s -S -O0 -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s
+; RUN: opt < %s -S -O1 -enable-coroutines -debug-only=coro-split 2>&1 | FileCheck %s
+
+; CHECK: CoroSplit: Processing coroutine 'f' state: 0
+; CHECK-NEXT: CoroSplit: Processing coroutine 'f' state: 1
+
+declare token @llvm.coro.begin(i8*, i8*, i32, i8*, i8*)
+
+; a coroutine start function
+define void @f() {
+ call token @llvm.coro.begin(i8* null, i8* null, i32 0, i8* null, i8* null)
+ ret void
+}
OpenPOWER on IntegriCloud