summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Kuperstein <michael.m.kuperstein@intel.com>2015-08-11 08:48:48 +0000
committerMichael Kuperstein <michael.m.kuperstein@intel.com>2015-08-11 08:48:48 +0000
commit7337ee23d8ee9864d6b5f4140aaa56cf68388361 (patch)
treec1cc309ab4a2552a07a71045e91ba535f9374ed0
parent82814f63c035de29b37485d14749f4ddfb7d10d5 (diff)
downloadbcm5719-llvm-7337ee23d8ee9864d6b5f4140aaa56cf68388361.tar.gz
bcm5719-llvm-7337ee23d8ee9864d6b5f4140aaa56cf68388361.zip
[X86] When optimizing for minsize, use POP for small post-call stack clean-up
When optimizing for size, replace "addl $4, %esp" and "addl $8, %esp" following a call by one or two pops, respectively. We don't try to do it in general, but only when the stack adjustment immediately follows a call - which is the most common case. That allows taking a short-cut when trying to find a free register to pop into, instead of a full-blown liveness check. If the adjustment immediately follows a call, then every register the call clobbers but doesn't define should be dead at that point, and can be used. Differential Revision: http://reviews.llvm.org/D11749 llvm-svn: 244578
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp69
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.h5
-rw-r--r--llvm/test/CodeGen/X86/fold-push.ll4
-rw-r--r--llvm/test/CodeGen/X86/pop-stack-cleanup.ll61
4 files changed, 136 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index b8858362e74..6cb2680be21 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -1851,6 +1851,69 @@ void X86FrameLowering::adjustForHiPEPrologue(
#endif
}
+bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL, int Offset) const {
+
+ if (Offset % SlotSize)
+ return false;
+
+ int NumPops = Offset / SlotSize;
+ // This is only worth it if we have at most 2 pops.
+ if (NumPops != 1 && NumPops != 2)
+ return false;
+
+ // Handle only the trivial case where the adjustment directly follows
+ // a call. This is the most common one, anyway.
+ if (MBBI == MBB.begin())
+ return false;
+ MachineBasicBlock::iterator Prev = std::prev(MBBI);
+ if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
+ return false;
+
+ unsigned Regs[2];
+ unsigned FoundRegs = 0;
+
+ auto RegMask = Prev->getOperand(1);
+
+ // Try to find up to NumPops free registers.
+ for (auto Candidate : X86::GR32_NOREX_NOSPRegClass) {
+
+ // Poor man's liveness:
+ // Since we're immediately after a call, any register that is clobbered
+ // by the call and not defined by it can be considered dead.
+ if (!RegMask.clobbersPhysReg(Candidate))
+ continue;
+
+ bool IsDef = false;
+ for (const MachineOperand &MO : Prev->implicit_operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == Candidate) {
+ IsDef = true;
+ break;
+ }
+ }
+
+ if (IsDef)
+ continue;
+
+ Regs[FoundRegs++] = Candidate;
+ if (FoundRegs == (unsigned)NumPops)
+ break;
+ }
+
+ if (FoundRegs == 0)
+ return false;
+
+ // If we found only one free register, but need two, reuse the same one twice.
+ while (FoundRegs < (unsigned)NumPops)
+ Regs[FoundRegs++] = Regs[0];
+
+ for (int i = 0; i < NumPops; ++i)
+ BuildMI(MBB, MBBI, DL,
+ TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
+
+ return true;
+}
+
void X86FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@@ -1882,8 +1945,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
if (Amount) {
// Add Amount to SP to destroy a frame, and subtract to setup.
int Offset = isDestroy ? Amount : -Amount;
- BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
+
+ if (!(MF.getFunction()->optForMinSize() &&
+ adjustStackWithPops(MBB, I, DL, Offset)))
+ BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
}
+
return;
}
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 495cfcd1c3f..b74fcf46169 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -146,6 +146,11 @@ private:
MachineBasicBlock::iterator MBBI, DebugLoc DL,
uint64_t MaxAlign) const;
+ /// Make small positive stack adjustments using POPs.
+ bool adjustStackWithPops(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ int Offset) const;
+
/// Adjusts the stack pointer using LEA, SUB, or ADD.
MachineInstrBuilder BuildStackAdjustment(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
diff --git a/llvm/test/CodeGen/X86/fold-push.ll b/llvm/test/CodeGen/X86/fold-push.ll
index f5f33321c13..cb4e435e35c 100644
--- a/llvm/test/CodeGen/X86/fold-push.ll
+++ b/llvm/test/CodeGen/X86/fold-push.ll
@@ -27,11 +27,11 @@ define void @test_min(i32 %a, i32 %b) minsize {
; CHECK: movl [[EAX:%e..]], (%esp)
; CHECK-NEXT: pushl [[EAX]]
; CHECK-NEXT: calll
-; CHECK-NEXT: addl $4, %esp
+; CHECK-NEXT: popl
; CHECK: nop
; CHECK: pushl (%esp)
; CHECK: calll
-; CHECK-NEXT: addl $4, %esp
+; CHECK-NEXT: popl
%c = add i32 %a, %b
call void @foo(i32 %c)
call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
diff --git a/llvm/test/CodeGen/X86/pop-stack-cleanup.ll b/llvm/test/CodeGen/X86/pop-stack-cleanup.ll
new file mode 100644
index 00000000000..c6fb2d8885b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pop-stack-cleanup.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL
+
+declare void @param1(i32 %a)
+declare i32 @param2_ret(i32 %a, i32 %b)
+declare i64 @param2_ret64(i32 %a, i32 %b)
+declare void @param2(i32 %a, i32 %b)
+declare void @param3(i32 %a, i32 %b, i32 %c)
+
+define void @test() minsize {
+; CHECK-LABEL: test:
+; CHECK: calll _param1
+; CHECK-NEXT: popl %eax
+; CHECK: calll _param2
+; CHECK-NEXT: popl %eax
+; CHECK-NEXT: popl %ecx
+; CHECK: calll _param2_ret
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: popl %edx
+; CHECK-NEXT: pushl %eax
+; CHECK: calll _param3
+; CHECK-NEXT: addl $12, %esp
+; CHECK: calll _param2_ret64
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: popl %ecx
+ call void @param1(i32 1)
+ call void @param2(i32 1, i32 2)
+ %ret = call i32 @param2_ret(i32 1, i32 2)
+ call void @param3(i32 1, i32 2, i32 %ret)
+ %ret64 = call i64 @param2_ret64(i32 1, i32 2)
+ ret void
+}
+
+define void @negative(i32 %k) {
+; CHECK-LABEL: negative:
+; CHECK: calll _param1
+; CHECK-NEXT: addl $4, %esp
+; CHECK: calll _param2
+; CHECK-NEXT: addl $8, %esp
+; CHECK: calll _param3
+; CHECK-NEXT: movl %ebp, %esp
+ %v = alloca i32, i32 %k
+ call void @param1(i32 1)
+ call void @param2(i32 1, i32 2)
+ call void @param3(i32 1, i32 2, i32 3)
+ ret void
+}
+
+define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize {
+; CHECK-LABEL: spill:
+; CHECK-DAG: movl %ecx,
+; CHECK-DAG: movl %edx,
+; CHECK: calll _param2_ret
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: popl %edx
+; CHECK-DAG: movl {{.*}}, %ecx
+; CHECK-DAG: movl {{.*}}, %edx
+; CHECK: calll _spill
+ %i = call i32 @param2_ret(i32 1, i32 2)
+ call void @spill(i32 %a, i32 %b, i32 %c)
+ ret void
+}
OpenPOWER on IntegriCloud