summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorFrancis Visoiu Mistrih <francisvm@yahoo.com>2018-04-27 15:30:54 +0000
committerFrancis Visoiu Mistrih <francisvm@yahoo.com>2018-04-27 15:30:54 +0000
commitc855e92ca9cf817d0e816f29ca1bb2edd9380f2e (patch)
tree4383fd5edc4205b85274a011ddcc17807534eddc /llvm/lib/Target
parentffb8d8711ce6fda53ae087914f8a96a01e9b37be (diff)
downloadbcm5719-llvm-c855e92ca9cf817d0e816f29ca1bb2edd9380f2e.tar.gz
bcm5719-llvm-c855e92ca9cf817d0e816f29ca1bb2edd9380f2e.zip
[AArch64] Place the first ldp at the end when ReverseCSRRestoreSeq is true
Put the first ldp at the end, so that the load-store optimizer can run and merge the ldp and the add into a post-index ldp. This didn't work in case no frame was needed and resulted in code size regressions. llvm-svn: 331044
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp75
1 files changed, 45 insertions, 30 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index a573e2cc73e..d233827078a 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -514,6 +514,38 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
}
+static void adaptForLdStOpt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator FirstSPPopI,
+ MachineBasicBlock::iterator LastPopI) {
+ // Sometimes (when we restore in the same order as we save), we can end up
+ // with code like this:
+ //
+ // ldp x26, x25, [sp]
+ // ldp x24, x23, [sp, #16]
+ // ldp x22, x21, [sp, #32]
+ // ldp x20, x19, [sp, #48]
+ // add sp, sp, #64
+ //
+ // In this case, it is always better to put the first ldp at the end, so
+ // that the load-store optimizer can run and merge the ldp and the add into
+ // a post-index ldp.
+ // If we managed to grab the first pop instruction, move it to the end.
+ if (ReverseCSRRestoreSeq)
+ MBB.splice(FirstSPPopI, &MBB, LastPopI);
+ // We should end up with something like this now:
+ //
+ // ldp x24, x23, [sp, #16]
+ // ldp x22, x21, [sp, #32]
+ // ldp x20, x19, [sp, #48]
+ // ldp x26, x25, [sp]
+ // add sp, sp, #64
+ //
+ // and the load-store optimizer can merge the last two instructions into:
+ //
+ // ldp x26, x25, [sp], #64
+ //
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -930,12 +962,20 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
int StackRestoreBytes = RedZone ? 0 : NumBytes;
if (NoCalleeSaveRestore)
StackRestoreBytes += AfterCSRPopSize;
- emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
- StackRestoreBytes, TII, MachineInstr::FrameDestroy);
+
// If we were able to combine the local stack pop with the argument pop,
// then we're done.
- if (NoCalleeSaveRestore || AfterCSRPopSize == 0)
+ bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0;
+
+ // If we're done after this, make sure to help the load store optimizer.
+ if (Done)
+ adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
+
+ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
+ StackRestoreBytes, TII, MachineInstr::FrameDestroy);
+ if (Done)
return;
+
NumBytes = 0;
}
@@ -967,33 +1007,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
FirstSPPopI = Prev;
}
- // Sometimes (when we restore in the same order as we save), we can end up
- // with code like this:
- //
- // ldp x26, x25, [sp]
- // ldp x24, x23, [sp, #16]
- // ldp x22, x21, [sp, #32]
- // ldp x20, x19, [sp, #48]
- // add sp, sp, #64
- //
- // In this case, it is always better to put the first ldp at the end, so
- // that the load-store optimizer can run and merge the ldp and the add into
- // a post-index ldp.
- // If we managed to grab the first pop instruction, move it to the end.
- if (LastPopI != Begin)
- MBB.splice(FirstSPPopI, &MBB, LastPopI);
- // We should end up with something like this now:
- //
- // ldp x24, x23, [sp, #16]
- // ldp x22, x21, [sp, #32]
- // ldp x20, x19, [sp, #48]
- // ldp x26, x25, [sp]
- // add sp, sp, #64
- //
- // and the load-store optimizer can merge the last two instructions into:
- //
- // ldp x26, x25, [sp], #64
- //
+ adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
+
emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
AfterCSRPopSize, TII, MachineInstr::FrameDestroy);
}
OpenPOWER on IntegriCloud