summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2019-10-18 10:35:46 +0000
committerDavid Green <david.green@arm.com>2019-10-18 10:35:46 +0000
commit651f07908a149b8eb861a1b109f98eeb3d4f1517 (patch)
treef4668600e2f4b3d5ea2b51cb2cb3b13edafd6688
parentef04598e147396e7225964ae8438ecbf6554b095 (diff)
downloadbcm5719-llvm-651f07908a149b8eb861a1b109f98eeb3d4f1517.tar.gz
bcm5719-llvm-651f07908a149b8eb861a1b109f98eeb3d4f1517.zip
[AArch64] Don't combine callee-save and local stack adjustment when optimizing for size
For arm64, D18619 introduced the ability to combine bumping the stack pointer upfront in case it needs to be bumped for both the callee-save area as well as the local stack area. That diff already remarks that "This change can cause an increase in instructions", but argues that even when that happens, it should be still be a performance benefit because the number of micro-ops is reduced. We have observed that this code-size increase can be significant in practice. This diff disables combining stack bumping for methods that are marked as optimize-for-size. Example of a prologue with the behavior before this diff (combining stack bumping when possible): sub sp, sp, #0x40 stp d9, d8, [sp, #0x10] stp x20, x19, [sp, #0x20] stp x29, x30, [sp, #0x30] add x29, sp, #0x30 [... compute x8 somehow ...] stp x0, x8, [sp] And after this diff, if the method is marked as optimize-for-size: stp d9, d8, [sp, #-0x30]! stp x20, x19, [sp, #0x10] stp x29, x30, [sp, #0x20] add x29, sp, #0x20 [... compute x8 somehow ...] stp x0, x8, [sp, #-0x10]! Note that without combining the stack bump there are two auto-decrements, nicely folded into the stp instructions, whereas otherwise there is a single sub sp, ... instruction, but not folded. Patch by Nikolai Tillmann! Differential Revision: https://reviews.llvm.org/D68530 llvm-svn: 375217
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp3
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-never-combine-csr-local-stack-bump-for-size.ll25
2 files changed, 28 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index addf3055ec5..68e1e6a3022 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -447,6 +447,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ if (MF.getFunction().hasOptSize())
+ return false;
+
if (AFI->getLocalStackSize() == 0)
return false;
diff --git a/llvm/test/CodeGen/AArch64/arm64-never-combine-csr-local-stack-bump-for-size.ll b/llvm/test/CodeGen/AArch64/arm64-never-combine-csr-local-stack-bump-for-size.ll
new file mode 100644
index 00000000000..273fb31e16c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-never-combine-csr-local-stack-bump-for-size.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra | FileCheck %s
+
+; CHECK-LABEL: main:
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK-NEXT: stp xzr, xzr, [sp, #-16]!
+; CHECK: adrp x0, l_.str@PAGE
+; CHECK: add x0, x0, l_.str@PAGEOFF
+; CHECK-NEXT: bl _puts
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ldp x29, x30, [sp], #16
+; CHECK-NEXT: ret
+
+@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00"
+
+define i32 @main() nounwind ssp optsize {
+entry:
+ %local1 = alloca i64, align 8
+ %local2 = alloca i64, align 8
+ store i64 0, i64* %local1
+ store i64 0, i64* %local2
+ %call = call i32 @puts(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0))
+ ret i32 %call
+}
+
+declare i32 @puts(i8*)
OpenPOWER on IntegriCloud