diff options
author | Francis Visoiu Mistrih <francisvm@yahoo.com> | 2018-04-27 15:30:54 +0000 |
---|---|---|
committer | Francis Visoiu Mistrih <francisvm@yahoo.com> | 2018-04-27 15:30:54 +0000 |
commit | c855e92ca9cf817d0e816f29ca1bb2edd9380f2e (patch) | |
tree | 4383fd5edc4205b85274a011ddcc17807534eddc /llvm/lib/Target | |
parent | ffb8d8711ce6fda53ae087914f8a96a01e9b37be (diff) | |
download | bcm5719-llvm-c855e92ca9cf817d0e816f29ca1bb2edd9380f2e.tar.gz bcm5719-llvm-c855e92ca9cf817d0e816f29ca1bb2edd9380f2e.zip |
[AArch64] Place the first ldp at the end when ReverseCSRRestoreSeq is true
Put the first ldp at the end, so that the load-store optimizer can run
and merge the ldp and the add into a post-index ldp.
This didn't work in case no frame was needed and resulted in code size
regressions.
llvm-svn: 331044
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 75 |
1 files changed, 45 insertions, 30 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index a573e2cc73e..d233827078a 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -514,6 +514,38 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); } +static void adaptForLdStOpt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator FirstSPPopI, + MachineBasicBlock::iterator LastPopI) { + // Sometimes (when we restore in the same order as we save), we can end up + // with code like this: + // + // ldp x26, x25, [sp] + // ldp x24, x23, [sp, #16] + // ldp x22, x21, [sp, #32] + // ldp x20, x19, [sp, #48] + // add sp, sp, #64 + // + // In this case, it is always better to put the first ldp at the end, so + // that the load-store optimizer can run and merge the ldp and the add into + // a post-index ldp. + // If we managed to grab the first pop instruction, move it to the end. + if (ReverseCSRRestoreSeq) + MBB.splice(FirstSPPopI, &MBB, LastPopI); + // We should end up with something like this now: + // + // ldp x24, x23, [sp, #16] + // ldp x22, x21, [sp, #32] + // ldp x20, x19, [sp, #48] + // ldp x26, x25, [sp] + // add sp, sp, #64 + // + // and the load-store optimizer can merge the last two instructions into: + // + // ldp x26, x25, [sp], #64 + // +} + void AArch64FrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -930,12 +962,20 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, int StackRestoreBytes = RedZone ? 0 : NumBytes; if (NoCalleeSaveRestore) StackRestoreBytes += AfterCSRPopSize; - emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - StackRestoreBytes, TII, MachineInstr::FrameDestroy); + // If we were able to combine the local stack pop with the argument pop, // then we're done. - if (NoCalleeSaveRestore || AfterCSRPopSize == 0) + bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0; + + // If we're done after this, make sure to help the load store optimizer. + if (Done) + adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); + + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, + StackRestoreBytes, TII, MachineInstr::FrameDestroy); + if (Done) return; + NumBytes = 0; } @@ -967,33 +1007,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, FirstSPPopI = Prev; } - // Sometimes (when we restore in the same order as we save), we can end up - // with code like this: - // - // ldp x26, x25, [sp] - // ldp x24, x23, [sp, #16] - // ldp x22, x21, [sp, #32] - // ldp x20, x19, [sp, #48] - // add sp, sp, #64 - // - // In this case, it is always better to put the first ldp at the end, so - // that the load-store optimizer can run and merge the ldp and the add into - // a post-index ldp. - // If we managed to grab the first pop instruction, move it to the end. - if (LastPopI != Begin) - MBB.splice(FirstSPPopI, &MBB, LastPopI); - // We should end up with something like this now: - // - // ldp x24, x23, [sp, #16] - // ldp x22, x21, [sp, #32] - // ldp x20, x19, [sp, #48] - // ldp x26, x25, [sp] - // add sp, sp, #64 - // - // and the load-store optimizer can merge the last two instructions into: - // - // ldp x26, x25, [sp], #64 - // + adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); + emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, AfterCSRPopSize, TII, MachineInstr::FrameDestroy); } |