diff options
Diffstat (limited to 'llvm/lib/Target/ARM/ARMFrameLowering.cpp')
-rw-r--r-- | llvm/lib/Target/ARM/ARMFrameLowering.cpp | 103 |
1 files changed, 81 insertions, 22 deletions
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 831c2eb7984..45c2c30db73 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -211,6 +211,68 @@ struct StackAdjustingInsts { }; } +/// Emit an instruction sequence that will align the address in +/// register Reg by zero-ing out the lower bits. For versions of the +/// architecture that support Neon, this must be done in a single +/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a +/// single instruction. That function only gets called when optimizing +/// spilling of D registers on a core with the Neon instruction set +/// present. +static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, + const TargetInstrInfo &TII, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, const unsigned Reg, + const unsigned Alignment, + const bool MustBeSingleInstruction) { + const ARMSubtarget &AST = MF.getTarget().getSubtarget<ARMSubtarget>(); + const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops(); + const unsigned AlignMask = Alignment - 1; + const unsigned NrBitsToZero = countTrailingZeros(Alignment); + assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported"); + if (!AFI->isThumbFunction()) { + // if the BFC instruction is available, use that to zero the lower + // bits: + // bfc Reg, #0, log2(Alignment) + // otherwise use BIC, if the mask to zero the required number of bits + // can be encoded in the bic immediate field + // bic Reg, Reg, Alignment-1 + // otherwise, emit + // lsr Reg, Reg, log2(Alignment) + // lsl Reg, Reg, log2(Alignment) + if (CanUseBFC) { + AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask)); + } else if (AlignMask <= 255) { + AddDefaultCC( + AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) + .addReg(Reg, RegState::Kill) + .addImm(AlignMask))); + } else { + assert(!MustBeSingleInstruction && + "Shouldn't call emitAligningInstructions demanding a single " + "instruction to be emitted for large stack alignment for a target " + "without BFC."); + AddDefaultCC(AddDefaultPred( + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)))); + AddDefaultCC(AddDefaultPred( + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)))); + } + } else { + // Since this is only reached for Thumb-2 targets, the BFC instruction + // should always be available. + assert(CanUseBFC); + AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask)); + } +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -568,28 +630,24 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // realigned. if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { unsigned MaxAlign = MFI->getMaxAlignment(); - assert (!AFI->isThumb1OnlyFunction()); + assert(!AFI->isThumb1OnlyFunction()); if (!AFI->isThumbFunction()) { - // Emit bic sp, sp, MaxAlign - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, - TII.get(ARM::BICri), ARM::SP) - .addReg(ARM::SP, RegState::Kill) - .addImm(MaxAlign-1))); + emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign, + false); } else { - // We cannot use sp as source/dest register here, thus we're emitting the - // following sequence: + // We cannot use sp as source/dest register here, thus we're using r4 to + // perform the calculations. We're emitting the following sequence: // mov r4, sp - // bic r4, r4, MaxAlign + // -- use emitAligningInstructions to produce best sequence to zero + // -- out lower bits in r4 // mov sp, r4 // FIXME: It will be better just to find spare register here. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) - .addReg(ARM::SP, RegState::Kill)); - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, - TII.get(ARM::t2BICri), ARM::R4) - .addReg(ARM::R4, RegState::Kill) - .addImm(MaxAlign-1))); + .addReg(ARM::SP, RegState::Kill)); + emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign, + false); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) - .addReg(ARM::R4, RegState::Kill)); + .addReg(ARM::R4, RegState::Kill)); } AFI->setShouldRestoreSPFromFP(true); @@ -1084,15 +1142,16 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // The immediate is <= 64, so it doesn't need any special encoding. unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addReg(ARM::SP) - .addImm(8 * NumAlignedDPRCS2Regs))); + .addReg(ARM::SP) + .addImm(8 * NumAlignedDPRCS2Regs))); - // bic r4, r4, #align-1 - Opc = isThumb ? ARM::t2BICri : ARM::BICri; unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addReg(ARM::R4, RegState::Kill) - .addImm(MaxAlign - 1))); + // We must set parameter MustBeSingleInstruction to true, since + // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform + // stack alignment. Luckily, this can always be done since all ARM + // architecture versions that support Neon also support the BFC + // instruction. + emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true); // mov sp, r4 // The stack pointer must be adjusted before spilling anything, otherwise |