diff options
4 files changed, 338 insertions, 5 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index c8d9d2a6d02..56f38aca500 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5169,32 +5169,51 @@ AArch64InstrInfo::getOutliningCandidateInfo( // We need to decide how to emit calls + frames. We can always emit the same // frame if we don't need to save to the stack. If we have to save to the // stack, then we need a different frame. - unsigned NumNoStackSave = 0; + unsigned NumBytesNoStackCalls = 0; + std::vector<outliner::Candidate> CandidatesWithoutStackFixups; for (outliner::Candidate &C : RepeatedSequenceLocs) { C.initLRU(TRI); // Is LR available? If so, we don't need a save. if (C.LRU.available(AArch64::LR)) { + NumBytesNoStackCalls += 4; C.setCallInfo(MachineOutlinerNoLRSave, 4); - ++NumNoStackSave; + CandidatesWithoutStackFixups.push_back(C); } // Is an unused register available? If so, we won't modify the stack, so // we can outline with the same frame type as those that don't save LR. else if (findRegisterToSaveLRTo(C)) { + NumBytesNoStackCalls += 12; C.setCallInfo(MachineOutlinerRegSave, 12); - ++NumNoStackSave; + CandidatesWithoutStackFixups.push_back(C); + } + + // Is SP used in the sequence at all? If not, we don't have to modify + // the stack, so we are guaranteed to get the same frame. + else if (C.UsedInSequence.available(AArch64::SP)) { + NumBytesNoStackCalls += 12; + C.setCallInfo(MachineOutlinerDefault, 12); + CandidatesWithoutStackFixups.push_back(C); + } + + // If we outline this, we need to modify the stack. Pretend we don't + // outline this by saving all of its bytes. + else { + NumBytesNoStackCalls += SequenceSize; } } // If there are no places where we have to save LR, then note that we don't // have to update the stack. Otherwise, give every candidate the default // call type. - if (NumNoStackSave == RepeatedSequenceLocs.size()) + if (NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) { + RepeatedSequenceLocs = CandidatesWithoutStackFixups; FrameID = MachineOutlinerNoLRSave; - else + } else { SetCandidateCallInfo(MachineOutlinerDefault, 12); + } } // Does every candidate's MBB contain a call? If so, then we might have a call diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-all-stack.mir b/llvm/test/CodeGen/AArch64/machine-outliner-all-stack.mir new file mode 100644 index 00000000000..83b170a3c98 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-all-stack.mir @@ -0,0 +1,112 @@ +# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner \ +# RUN: -verify-machineinstrs %s -o - | FileCheck %s + +# Show that, when instructions that use the stack are present, it's possible +# for us to outline everything as the default outlining type. +# It's possible for reg-save-possible to outline by storing LR to a register, +# but most candidates in this case require us to modify the stack. The outliner +# should see that it's more beneficial to fix up instructions and save LR to +# the stack in this case. + +--- | + define void @reg-save-possible() #0 { ret void } + define void @stack-save1() #0 { ret void } + define void @stack-save2() #0 { ret void } + define void @stack-save3() #0 { ret void } + attributes #0 = { minsize noinline noredzone "no-frame-pointer-elim"="true" } +... +--- + +name: reg-save-possible +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + $lr = ORRXri $xzr, 1 + $x19 = ORRXri $xzr, 1 + $x20 = ORRXri $xzr, 1 + bb.1: + liveins: $lr + ; CHECK-LABEL: name: reg-save-possible + ; CHECK: $sp = STRXpre $lr, $sp, -16 + ; CHECK-NEXT: BL [[FN:@OUTLINED_FUNCTION_[0-9]+]] + ; CHECK-NEXT: $sp, $lr = LDRXpost $sp, 16 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + bb.2: + RET undef $lr + +... +--- + +name: stack-save1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + $lr = ORRXri $xzr, 1 + bb.1: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + ; CHECK-LABEL: name: stack-save1 + ; CHECK: $sp = STRXpre $lr, $sp, -16 + ; CHECK-NEXT: BL [[FN]] + ; CHECK-NEXT: $sp, $lr = LDRXpost $sp, 16 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + bb.2: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + RET undef $lr + +... +--- + +name: stack-save2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + $lr = ORRXri $xzr, 1 + bb.1: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + ; CHECK-LABEL: name: stack-save2 + ; CHECK: $sp = STRXpre $lr, $sp, -16 + ; CHECK-NEXT: BL [[FN]] + ; CHECK-NEXT: $sp, $lr = LDRXpost $sp, 16 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + bb.2: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + RET undef $lr + +... +--- + +name: stack-save3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + $lr = ORRXri $xzr, 1 + bb.1: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + ; CHECK-LABEL: name: stack-save3 + ; CHECK: $sp = STRXpre $lr, $sp, -16 + ; CHECK-NEXT: BL [[FN]] + ; CHECK-NEXT: $sp, $lr = LDRXpost $sp, 16 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + $x20, $x19 = LDPXi $sp, 10 + bb.2: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + RET undef $lr diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-compatible-candidates.mir b/llvm/test/CodeGen/AArch64/machine-outliner-compatible-candidates.mir new file mode 100644 index 00000000000..b153b4c5de2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-compatible-candidates.mir @@ -0,0 +1,103 @@ +# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner \ +# RUN: -verify-machineinstrs %s -o - | FileCheck %s + +# Ensure that we can outline candidates with compatible call/frame classes. +# +# - Save/restores that don't impact the stack can be outlined together. +# - Save/restores that impact the stack if the outlined sequence doesn't use +# the stack. + +--- | + define void @no-save1() #0 { ret void } + define void @no-save2() #0 { ret void } + define void @reg-save() #0 { ret void } + define void @stack-save() #0 { ret void } + attributes #0 = { minsize noinline noredzone "no-frame-pointer-elim"="true" } +... +--- + +name: no-save1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + $lr = ORRXri $xzr, 1 + bb.1: + ; CHECK-LABEL: name: no-save1 + ; CHECK: BL [[FN:@OUTLINED_FUNCTION_[0-9]+]] + ; CHECK-NOT: STRXpre + ; CHECK-NOT: $lr = + ; CHECK-NOT: ORRXrs + $w11 = ORRWri $wzr, 1 + $w11 = ORRWri $wzr, 1 + $w11 = ORRWri $wzr, 1 + $w11 = ORRWri $wzr, 1 + bb.2: + RET undef $lr + +... +--- + +name: no-save2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + $lr = ORRXri $xzr, 1 + bb.1: + ; CHECK-LABEL: name: no-save2 + ; CHECK: BL [[FN]] + ; CHECK-NOT: STRXpre + ; CHECK-NOT: $lr = + ; CHECK-NOT: ORRXrs + $w11 = ORRWri $wzr, 1 + $w11 = ORRWri $wzr, 1 + $w11 = ORRWri $wzr, 1 + $w11 = ORRWri $wzr, 1 + bb.2: + RET undef $lr +... +--- + +name: reg-save +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + $lr = ORRXri $xzr, 1 + bb.1: + liveins: $lr + ; CHECK-LABEL: name: reg-save + ; CHECK: $[[REG:x[0-9]+]] = ORRXrs $xzr, $lr, 0 + ; CHECK-NEXT: BL [[FN]] + ; CHECK-NEXT: $lr = ORRXrs $xzr, $[[REG]], 0 + $w11 = ORRWri $wzr, 1 + $w11 = ORRWri $wzr, 1 + $w11 = ORRWri $wzr, 1 + $w11 = ORRWri $wzr, 1 + bb.2: + liveins: $lr + RET undef $lr + +... +--- + +name: stack-save +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + $lr = ORRXri $xzr, 1 + bb.1: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + ; CHECK-LABEL: name: stack-save + ; CHECK: $sp = STRXpre $lr, $sp, -16 + ; CHECK-NEXT: BL [[FN]] + ; CHECK-NEXT: $sp, $lr = LDRXpost $sp, 16 + $w11 = ORRWri $wzr, 1 + $w11 = ORRWri $wzr, 1 + $w11 = ORRWri $wzr, 1 + $w11 = ORRWri $wzr, 1 + bb.2: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + RET undef $lr diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-drop-stack.mir b/llvm/test/CodeGen/AArch64/machine-outliner-drop-stack.mir new file mode 100644 index 00000000000..eb17dab5c10 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-outliner-drop-stack.mir @@ -0,0 +1,99 @@ +# RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner \ +# RUN: -verify-machineinstrs %s -o - | FileCheck %s + +--- | + define void @no-save1() #0 { ret void } + define void @no-save2() #0 { ret void } + define void @reg-save() #0 { ret void } + define void @stack-save() #0 { ret void } + attributes #0 = { minsize noinline noredzone "no-frame-pointer-elim"="true" } +... +--- + +name: no-save1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + $lr = ORRXri $xzr, 1 + bb.1: + ; CHECK-LABEL: name: no-save1 + ; CHECK: BL [[FN:@OUTLINED_FUNCTION_[0-9]+]] + ; CHECK-NOT: STRXpre + ; CHECK-NOT: $lr = + ; CHECK-NOT: ORRXrs + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + bb.2: + RET undef $lr + +... +--- + +name: no-save2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + $lr = ORRXri $xzr, 1 + bb.1: + ; CHECK-LABEL: name: no-save2 + ; CHECK: BL [[FN]] + ; CHECK-NOT: STRXpre + ; CHECK-NOT: $lr = + ; CHECK-NOT: ORRXrs + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + bb.2: + RET undef $lr +... +--- + +name: reg-save +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + $lr = ORRXri $xzr, 1 + bb.1: + liveins: $lr + ; CHECK-LABEL: name: reg-save + ; CHECK: $[[REG:x[0-9]+]] = ORRXrs $xzr, $lr, 0 + ; CHECK-NEXT: BL [[FN]] + ; CHECK-NEXT: $lr = ORRXrs $xzr, $[[REG]], 0 + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + bb.2: + liveins: $lr + RET undef $lr + +... +--- + +name: stack-save +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + $lr = ORRXri $xzr, 1 + bb.1: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + ; CHECK-LABEL: name: stack-save + ; CHECK-NOT: BL + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + $x12 = ADDXri $sp, 48, 0; + bb.2: + liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x18, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp + RET undef $lr |