diff options
-rw-r--r-- | llvm/lib/CodeGen/InlineSpiller.cpp | 35 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/statepoint-live-in.ll | 456 |
2 files changed, 490 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp index 7f89adfab75..647168b1c33 100644 --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -75,6 +75,10 @@ STATISTIC(NumRemats, "Number of rematerialized defs for spilling"); static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden, cl::desc("Disable inline spill hoisting")); +static cl::opt<bool> +RestrictStatepointRemat("restrict-statepoint-remat", + cl::init(false), cl::Hidden, + cl::desc("Restrict remat for statepoint operands")); namespace { @@ -214,6 +218,7 @@ private: void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); void markValueUsed(LiveInterval*, VNInfo*); + bool canGuaranteeAssignmentAfterRemat(unsigned VReg, MachineInstr &MI); bool reMaterializeFor(LiveInterval &, MachineInstr &MI); void reMaterializeAll(); @@ -513,6 +518,28 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { } while (!WorkList.empty()); } +bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg, + MachineInstr &MI) { + if (!RestrictStatepointRemat) + return true; + // Here's a quick explanation of the problem we're trying to handle here: + // * There are some pseudo instructions with more vreg uses than there are + // physical registers on the machine. + // * This is normally handled by spilling the vreg, and folding the reload + // into the user instruction. (Thus decreasing the number of used vregs + // until the remainder can be assigned to physregs.) + // * However, since we may try to spill vregs in any order, we can end up + // trying to spill each operand to the instruction, and then rematting it + // instead. When that happens, the new live intervals (for the remats) are + // expected to be trivially assignable (i.e. RS_Done). However, since we + // may have more remats than physregs, we're guaranteed to fail to assign + // one. + // At the moment, we only handle this for STATEPOINTs since they're the only + // psuedo op where we've seen this. If we start seeing other instructions + // with the same problem, we need to revisit this. + return (MI.getOpcode() != TargetOpcode::STATEPOINT); +} + /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // Analyze instruction @@ -568,6 +595,14 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { return true; } + // If we can't guarantee that we'll be able to actually assign the new vreg, + // we can't remat. + if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg, MI)) { + markValueUsed(&VirtReg, ParentVNI); + LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI); + return false; + } + // Allocate a new register for the remat. unsigned NewVReg = Edit->createFrom(Original); diff --git a/llvm/test/CodeGen/X86/statepoint-live-in.ll b/llvm/test/CodeGen/X86/statepoint-live-in.ll index 69affe2a9fc..242f03fc388 100644 --- a/llvm/test/CodeGen/X86/statepoint-live-in.ll +++ b/llvm/test/CodeGen/X86/statepoint-live-in.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -O3 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -O3 -restrict-statepoint-remat < %s | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" @@ -128,6 +128,460 @@ entry: ret void } +; A variant of test7 where values are not directly foldable from stack slots. +define void @test7(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; The code for this is terrible, check simply for correctness for the moment +; CHECK-LABEL: test7: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: subq $88, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 144 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl %edi, %edi +; CHECK-NEXT: movl %esi, %esi +; CHECK-NEXT: movl %edx, %edx +; CHECK-NEXT: movl %ecx, %ecx +; CHECK-NEXT: movl %r8d, %r8d +; CHECK-NEXT: movl %r9d, %r9d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r13d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r12d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r15d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r14d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r10d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: callq _bar ## 88-byte Folded Reload +; CHECK-NEXT: Ltmp8: +; CHECK-NEXT: addq $88, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +entry: + %a64 = zext i32 %a to i64 + %b64 = zext i32 %b to i64 + %c64 = zext i32 %c to i64 + %d64 = zext i32 %d to i64 + %e64 = zext i32 %e to i64 + %f64 = zext i32 %f to i64 + %g64 = zext i32 %g to i64 + %h64 = zext i32 %h to i64 + %i64 = zext i32 %i to i64 + %j64 = zext i32 %j to i64 + %k64 = zext i32 %k to i64 + %l64 = zext i32 %l to i64 + %m64 = zext i32 %m to i64 + %n64 = zext i32 %n to i64 + %o64 = zext i32 %o to i64 + %p64 = zext i32 %p to i64 + %q64 = zext i32 %q to i64 + %r64 = zext i32 %r to i64 + %s64 = zext i32 %s to i64 + %t64 = zext i32 %t to i64 + %u64 = zext i32 %u to i64 + %v64 = zext i32 %v to i64 + %w64 = zext i32 %w to i64 + %x64 = zext i32 %x to i64 + %y64 = zext i32 %y to i64 + %z64 = zext i32 %z to i64 + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 26, i64 %a64, i64 %b64, i64 %c64, i64 %d64, i64 %e64, i64 %f64, i64 %g64, i64 %h64, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64) + ret void +} + +; a variant of test7 with mixed types chosen to exercise register aliases +define void @test8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; The code for this is terrible, check simply for correctness for the moment +; CHECK-LABEL: test8: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: subq $72, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movl %r9d, %r10d +; CHECK-NEXT: movl %r8d, %r9d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebp +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r13d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r12d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r15d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r14d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r11d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %r8d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: callq _bar ## 72-byte Folded Reload +; CHECK-NEXT: Ltmp9: +; CHECK-NEXT: addq $72, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq +entry: + %a8 = trunc i32 %a to i8 + %b8 = trunc i32 %b to i8 + %c8 = trunc i32 %c to i8 + %d8 = trunc i32 %d to i8 + %e16 = trunc i32 %e to i16 + %f16 = trunc i32 %f to i16 + %g16 = trunc i32 %g to i16 + %h16 = trunc i32 %h to i16 + %i64 = zext i32 %i to i64 + %j64 = zext i32 %j to i64 + %k64 = zext i32 %k to i64 + %l64 = zext i32 %l to i64 + %m64 = zext i32 %m to i64 + %n64 = zext i32 %n to i64 + %o64 = zext i32 %o to i64 + %p64 = zext i32 %p to i64 + %q64 = zext i32 %q to i64 + %r64 = zext i32 %r to i64 + %s64 = zext i32 %s to i64 + %t64 = zext i32 %t to i64 + %u64 = zext i32 %u to i64 + %v64 = zext i32 %v to i64 + %w64 = zext i32 %w to i64 + %x64 = zext i32 %x to i64 + %y64 = zext i32 %y to i64 + %z64 = zext i32 %z to i64 + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 26, i8 %a8, i8 %b8, i8 %c8, i8 %d8, i16 %e16, i16 %f16, i16 %g16, i16 %h16, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64) + ret void +} + +; Test perfect forwarding of argument registers and stack slots to the +; deopt bundle uses +define void @test9(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; CHECK-LABEL: test9: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp10: +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq + +entry: + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) + ret void +} + +; Test enough folding of argument slots when we have one call which clobbers +; registers before a second which needs them - i.e. we must do something with +; arguments originally passed in registers +define void @test10(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; FIXME (minor): It would be better to just spill (and fold reload) for +; argument registers then spill and fill all the CSRs. +; CHECK-LABEL: test10: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movl %r9d, %r15d +; CHECK-NEXT: movl %r8d, %r14d +; CHECK-NEXT: movl %ecx, %r12d +; CHECK-NEXT: movl %edx, %r13d +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp11: +; CHECK-NEXT: callq _bar +; CHECK-NEXT: Ltmp12: +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq + +entry: + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) + %statepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) + ret void +} + +; Check that we can remat some uses of a def despite not remating before the +; statepoint user. +define i64 @test11(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; FIXME: The codegen for this is correct, but horrible. Lots of room for +; improvement if we so desire. +; CHECK-LABEL: test11: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 56 +; CHECK-NEXT: subq $168, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 224 +; CHECK-NEXT: .cfi_offset %rbx, -56 +; CHECK-NEXT: .cfi_offset %r12, -48 +; CHECK-NEXT: .cfi_offset %r13, -40 +; CHECK-NEXT: .cfi_offset %r14, -32 +; CHECK-NEXT: .cfi_offset %r15, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: movl %esi, %r15d +; CHECK-NEXT: movl %edx, %r12d +; CHECK-NEXT: movl %ecx, %r13d +; CHECK-NEXT: movl %r8d, %ebp +; CHECK-NEXT: movl %r9d, %r14d +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: callq _bar ## 160-byte Folded Reload +; CHECK-NEXT: Ltmp13: +; CHECK-NEXT: addq %r15, %rbx +; CHECK-NEXT: addq %r12, %rbx +; CHECK-NEXT: addq %r13, %rbx +; CHECK-NEXT: addq %rbp, %rbx +; CHECK-NEXT: addq %r14, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: addq %rax, %rbx +; CHECK-NEXT: movq %rbx, %rax +; CHECK-NEXT: addq $168, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: retq + +entry: + %a64 = zext i32 %a to i64 + %b64 = zext i32 %b to i64 + %c64 = zext i32 %c to i64 + %d64 = zext i32 %d to i64 + %e64 = zext i32 %e to i64 + %f64 = zext i32 %f to i64 + %g64 = zext i32 %g to i64 + %h64 = zext i32 %h to i64 + %i64 = zext i32 %i to i64 + %j64 = zext i32 %j to i64 + %k64 = zext i32 %k to i64 + %l64 = zext i32 %l to i64 + %m64 = zext i32 %m to i64 + %n64 = zext i32 %n to i64 + %o64 = zext i32 %o to i64 + %p64 = zext i32 %p to i64 + %q64 = zext i32 %q to i64 + %r64 = zext i32 %r to i64 + %s64 = zext i32 %s to i64 + %t64 = zext i32 %t to i64 + %u64 = zext i32 %u to i64 + %v64 = zext i32 %v to i64 + %w64 = zext i32 %w to i64 + %x64 = zext i32 %x to i64 + %y64 = zext i32 %y to i64 + %z64 = zext i32 %z to i64 + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i64 0, i64 26, i64 %a64, i64 %b64, i64 %c64, i64 %d64, i64 %e64, i64 %f64, i64 %g64, i64 %h64, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64) + %addab = add i64 %a64, %b64 + %addc = add i64 %addab, %c64 + %addd = add i64 %addc, %d64 + %adde = add i64 %addd, %e64 + %addf = add i64 %adde, %f64 + %addg = add i64 %addf, %g64 + %addh = add i64 %addg, %h64 + %addi = add i64 %addh, %i64 + %addj = add i64 %addi, %j64 + %addk = add i64 %addj, %k64 + %addl = add i64 %addk, %l64 + %addm = add i64 %addl, %m64 + %addn = add i64 %addm, %n64 + %addo = add i64 %addn, %o64 + %addp = add i64 %addo, %p64 + %addq = add i64 %addp, %q64 + %addr = add i64 %addq, %r64 + %adds = add i64 %addr, %s64 + %addt = add i64 %adds, %t64 + %addu = add i64 %addt, %u64 + %addv = add i64 %addu, %v64 + %addw = add i64 %addv, %w64 + %addx = add i64 %addw, %x64 + %addy = add i64 %addx, %y64 + %addz = add i64 %addy, %z64 + ret i64 %addz +} + ; CHECK: Ltmp0-_test1 ; CHECK: .byte 1 |