summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/IR/Statepoint.h11
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp40
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp11
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp26
-rw-r--r--llvm/test/CodeGen/X86/statepoint-live-in.ll131
-rw-r--r--llvm/test/Transforms/RewriteStatepointsForGC/deopt-lowering-attrs.ll23
6 files changed, 233 insertions, 9 deletions
diff --git a/llvm/include/llvm/IR/Statepoint.h b/llvm/include/llvm/IR/Statepoint.h
index 5cd7fe1b576..783a5cc3c12 100644
--- a/llvm/include/llvm/IR/Statepoint.h
+++ b/llvm/include/llvm/IR/Statepoint.h
@@ -34,8 +34,15 @@ enum class StatepointFlags {
None = 0,
GCTransition = 1, ///< Indicates that this statepoint is a transition from
///< GC-aware code to code that is not GC-aware.
-
- MaskAll = GCTransition ///< A bitmask that includes all valid flags.
+ /// Mark the deopt arguments associated with the statepoint as only being
+ /// "live-in". By default, deopt arguments are "live-through". "live-through"
+ /// requires that they the value be live on entry, on exit, and at any point
+ /// during the call. "live-in" only requires the value be available at the
+ /// start of the call. In particular, "live-in" values can be placed in
+ /// unused argument registers or other non-callee saved registers.
+ DeoptLiveIn = 2,
+
+ MaskAll = 3 ///< A bitmask that includes all valid flags.
};
class GCRelocateInst;
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index e996c89e681..a8d6847e93a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -370,7 +370,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
/// Lower a single value incoming to a statepoint node. This value can be
/// either a deopt value or a gc value, the handling is the same. We special
/// case constants and allocas, then fall back to spilling if required.
-static void lowerIncomingStatepointValue(SDValue Incoming,
+static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
SmallVectorImpl<SDValue> &Ops,
SelectionDAGBuilder &Builder) {
SDValue Chain = Builder.getRoot();
@@ -389,6 +389,14 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
// relocate the address of the alloca itself?)
Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
Incoming.getValueType()));
+ } else if (LiveInOnly) {
+ // If this value is live in (not live-on-return, or live-through), we can
+ // treat it the same way patchpoint treats it's "live in" values. We'll
+ // end up folding some of these into stack references, but they'll be
+ // handled by the register allocator. Note that we do not have the notion
+ // of a late use so these values might be placed in registers which are
+ // clobbered by the call. This is fine for live-in.
+ Ops.push_back(Incoming);
} else {
// Otherwise, locate a spill slot and explicitly spill it so it
// can be found by the runtime later. We currently do not support
@@ -439,19 +447,38 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
"non gc managed derived pointer found in statepoint");
}
}
+ assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!");
} else {
assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!");
assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!");
}
#endif
+ // Figure out what lowering strategy we're going to use for each part
+ // Note: Is is conservatively correct to lower both "live-in" and "live-out"
+ // as "live-through". A "live-through" variable is one which is "live-in",
+ // "live-out", and live throughout the lifetime of the call (i.e. we can find
+ // it from any PC within the transitive callee of the statepoint). In
+ // particular, if the callee spills callee preserved registers we may not
+ // be able to find a value placed in that register during the call. This is
+ // fine for live-out, but not for live-through. If we were willing to make
+ // assumptions about the code generator producing the callee, we could
+ // potentially allow live-through values in callee saved registers.
+ const bool LiveInDeopt =
+ SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn;
+
+ auto isGCValue =[&](const Value *V) {
+ return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V);
+ };
+
// Before we actually start lowering (and allocating spill slots for values),
// reserve any stack slots which we judge to be profitable to reuse for a
// particular value. This is purely an optimization over the code below and
// doesn't change semantics at all. It is important for performance that we
// reserve slots for both deopt and gc values before lowering either.
for (const Value *V : SI.DeoptState) {
- reservePreviousStackSlotForValue(V, Builder);
+ if (!LiveInDeopt || isGCValue(V))
+ reservePreviousStackSlotForValue(V, Builder);
}
for (unsigned i = 0; i < SI.Bases.size(); ++i) {
reservePreviousStackSlotForValue(SI.Bases[i], Builder);
@@ -468,7 +495,8 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// what type of values are contained within.
for (const Value *V : SI.DeoptState) {
SDValue Incoming = Builder.getValue(V);
- lowerIncomingStatepointValue(Incoming, Ops, Builder);
+ const bool LiveInValue = LiveInDeopt && !isGCValue(V);
+ lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder);
}
// Finally, go ahead and lower all the gc arguments. There's no prefixed
@@ -478,10 +506,12 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
// (base[0], ptr[0], base[1], ptr[1], ...)
for (unsigned i = 0; i < SI.Bases.size(); ++i) {
const Value *Base = SI.Bases[i];
- lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder);
+ lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false,
+ Ops, Builder);
const Value *Ptr = SI.Ptrs[i];
- lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder);
+ lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false,
+ Ops, Builder);
}
// If there are any explicit spill slots passed to the statepoint, record
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index a5d480bc4cd..1652e48a328 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -448,6 +448,11 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
StartIdx = PatchPointOpers(&MI).getVarIdx();
break;
}
+ case TargetOpcode::STATEPOINT: {
+ // For statepoints, fold deopt and gc arguments, but not call arguments.
+ StartIdx = StatepointOpers(&MI).getVarIdx();
+ break;
+ }
default:
llvm_unreachable("unexpected stackmap opcode");
}
@@ -513,7 +518,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
MachineInstr *NewMI = nullptr;
if (MI.getOpcode() == TargetOpcode::STACKMAP ||
- MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+ MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI.getOpcode() == TargetOpcode::STATEPOINT) {
// Fold stackmap/patchpoint.
NewMI = foldPatchpoint(MF, MI, Ops, FI, *this);
if (NewMI)
@@ -794,7 +800,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
int FrameIndex = 0;
if ((MI.getOpcode() == TargetOpcode::STACKMAP ||
- MI.getOpcode() == TargetOpcode::PATCHPOINT) &&
+ MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI.getOpcode() == TargetOpcode::STATEPOINT) &&
isLoadFromStackSlot(LoadMI, FrameIndex)) {
// Fold stackmap/patchpoint.
NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 9bcf8944660..b07d9c90e37 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1273,6 +1273,24 @@ public:
};
}
+static StringRef getDeoptLowering(CallSite CS) {
+ const char *DeoptLowering = "deopt-lowering";
+ if (CS.hasFnAttr(DeoptLowering)) {
+ // FIXME: CallSite has a *really* confusing interface around attributes
+ // with values.
+ const AttributeSet &CSAS = CS.getAttributes();
+ if (CSAS.hasAttribute(AttributeSet::FunctionIndex,
+ DeoptLowering))
+ return CSAS.getAttribute(AttributeSet::FunctionIndex,
+ DeoptLowering).getValueAsString();
+ Function *F = CS.getCalledFunction();
+ assert(F && F->hasFnAttribute(DeoptLowering));
+ return F->getFnAttribute(DeoptLowering).getValueAsString();
+ }
+ return "live-through";
+}
+
+
static void
makeStatepointExplicitImpl(const CallSite CS, /* to replace */
const SmallVectorImpl<Value *> &BasePtrs,
@@ -1314,6 +1332,14 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
if (SD.StatepointID)
StatepointID = *SD.StatepointID;
+ // Pass through the requested lowering if any. The default is live-through.
+ StringRef DeoptLowering = getDeoptLowering(CS);
+ if (DeoptLowering.equals("live-in"))
+ Flags |= uint32_t(StatepointFlags::DeoptLiveIn);
+ else {
+ assert(DeoptLowering.equals("live-through") && "Unsupported value!");
+ }
+
Value *CallTarget = CS.getCalledValue();
if (Function *F = dyn_cast<Function>(CallTarget)) {
if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) {
diff --git a/llvm/test/CodeGen/X86/statepoint-live-in.ll b/llvm/test/CodeGen/X86/statepoint-live-in.ll
new file mode 100644
index 00000000000..d69abd87424
--- /dev/null
+++ b/llvm/test/CodeGen/X86/statepoint-live-in.ll
@@ -0,0 +1,131 @@
+; RUN: llc -O3 < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+declare void @bar() #0
+declare void @baz()
+
+define void @test1(i32 %a) gc "statepoint-example" {
+entry:
+; We expect the argument to be passed in an extra register to bar
+; CHECK-LABEL: test1
+; CHECK: pushq %rax
+; CHECK-NEXT: Ltmp0:
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq _bar
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 %a)
+ ret void
+}
+
+define void @test2(i32 %a, i32 %b) gc "statepoint-example" {
+entry:
+; Because the first call clobbers esi, we have to move the values into
+; new registers. Note that they stay in the registers for both calls.
+; CHECK-LABEL: @test2
+; CHECK: movl %esi, %ebx
+; CHECK-NEXT: movl %edi, %ebp
+; CHECK-NEXT: callq _bar
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 2, i32 %a, i32 %b)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 2, i32 %b, i32 %a)
+ ret void
+}
+
+define void @test3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i) gc "statepoint-example" {
+entry:
+; TODO: We should have folded the reload into the statepoint.
+; CHECK-LABEL: @test3
+; CHECK: movl 32(%rsp), %r10d
+; CHECK-NEXT: movl 24(%rsp), %r11d
+; CHECK-NEXT: movl 16(%rsp), %eax
+; CHECK-NEXT: callq _bar
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 9, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i)
+ ret void
+}
+
+; This case just confirms that we don't crash when given more live values
+; than registers. This is a case where we *have* to use a stack slot.
+define void @test4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
+entry:
+; TODO: We should have folded the reload into the statepoint.
+; CHECK-LABEL: test4
+; CHECK: pushq %r15
+; CHECK: pushq %r14
+; CHECK: pushq %r13
+; CHECK: pushq %r12
+; CHECK: pushq %rbx
+; CHECK: pushq %rax
+; CHECK: movl 128(%rsp), %r13d
+; CHECK-NEXT: movl 120(%rsp), %r12d
+; CHECK-NEXT: movl 112(%rsp), %r15d
+; CHECK-NEXT: movl 104(%rsp), %r14d
+; CHECK-NEXT: movl 96(%rsp), %ebp
+; CHECK-NEXT: movl 88(%rsp), %ebx
+; CHECK-NEXT: movl 80(%rsp), %r11d
+; CHECK-NEXT: movl 72(%rsp), %r10d
+; CHECK-NEXT: movl 64(%rsp), %eax
+; CHECK-NEXT: callq _bar
+ %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z)
+ ret void
+}
+
+; A live-through gc-value must be spilled even if it is also a live-in deopt
+; value. For live-in, we could technically report the register copy, but from
+; a code quality perspective it's better to reuse the required stack slot so
+; as to put less stress on the register allocator for no benefit.
+define i32 addrspace(1)* @test5(i32 %a, i32 addrspace(1)* %p) gc "statepoint-example" {
+entry:
+; CHECK-LABEL: test5
+; CHECK: movq %rsi, (%rsp)
+; CHECK-NEXT: callq _bar
+ %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 %a, i32 addrspace(1)* %p, i32 addrspace(1)* %p)
+ %p2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 9, i32 9)
+ ret i32 addrspace(1)* %p2
+}
+
+; Show the interaction of live-through spilling followed by live-in.
+define void @test6(i32 %a) gc "statepoint-example" {
+entry:
+; TODO: We could have reused the previous spill slot at zero additional cost.
+; CHECK-LABEL: test6
+; CHECK: movl %edi, %ebx
+; CHECK: movl %ebx, 12(%rsp)
+; CHECK-NEXT: callq _baz
+; CHECK-NEXT: Ltmp30:
+; CHECK-NEXT: callq _bar
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @baz, i32 0, i32 0, i32 0, i32 1, i32 %a)
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 %a)
+ ret void
+}
+
+
+; CHECK: Ltmp1-_test1
+; CHECK: .byte 1
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .short 5
+; CHECK-NEXT: .long 0
+
+; CHECK: Ltmp7-_test2
+; CHECK: .byte 1
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .long 0
+; CHECK: .byte 1
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .short 3
+; CHECK-NEXT: .long 0
+; CHECK: Ltmp8-_test2
+; CHECK: .byte 1
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .short 3
+; CHECK-NEXT: .long 0
+; CHECK: .byte 1
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .long 0
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
+
+
+attributes #0 = { "deopt-lowering"="live-in" }
+attributes #1 = { "deopt-lowering"="live-through" }
diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/deopt-lowering-attrs.ll b/llvm/test/Transforms/RewriteStatepointsForGC/deopt-lowering-attrs.ll
new file mode 100644
index 00000000000..22ca5cad1da
--- /dev/null
+++ b/llvm/test/Transforms/RewriteStatepointsForGC/deopt-lowering-attrs.ll
@@ -0,0 +1,23 @@
+; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s
+; Check that the "deopt-lowering" function attribute gets transcoded into
+; flags on the resulting statepoint
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+declare void @foo()
+declare void @bar() "deopt-lowering"="live-in"
+declare void @baz() "deopt-lowering"="live-through"
+
+define void @test1() gc "statepoint-example" {
+; CHECK-LABEL: @test1(
+; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 1, i32 57)
+; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 42)
+; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @baz, i32 0, i32 0, i32 0, i32 1, i32 13)
+
+entry:
+ call void @foo() [ "deopt"(i32 57) ]
+ call void @bar() [ "deopt"(i32 42) ]
+ call void @baz() [ "deopt"(i32 13) ]
+ ret void
+}
OpenPOWER on IntegriCloud