diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrThumb.td | 11 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 36 |
4 files changed, 57 insertions, 12 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b3fd4033d21..7574727c242 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -546,12 +546,13 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // and pick a real one. Offset += 128; // 128 bytes of spill slots - // If there is a frame pointer, try using it. + // If there's a frame pointer and the addressing mode allows it, try using it. // The FP is only available if there is no dynamic realignment. We // don't know for sure yet whether we'll need that, so we guess based // on whether there are any local variables that would trigger it. unsigned StackAlign = TFI->getStackAlignment(); - if (TFI->hasFP(MF) && + if (TFI->hasFP(MF) && + (MI->getDesc().TSFlags & ARMII::AddrModeMask) != ARMII::AddrModeT1_s && !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) { if (isFrameOffsetLegal(MI, FPOffset)) return false; @@ -668,7 +669,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, NumBits = 8; break; case ARMII::AddrModeT1_s: - NumBits = 5; + NumBits = 8; Scale = 4; isSigned = false; break; diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 77464bd47ea..6ebf640e0b7 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1197,6 +1197,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); + // Only multiples of 4 are allowed for the offset, so the frame object + // alignment must be at least 4. + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (MFI->getObjectAlignment(FI) < 4) + MFI->setObjectAlignment(FI, 4); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; @@ -1214,6 +1219,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); + // For LHS+RHS to result in an offset that's a multiple of 4 the object + // indexed by the LHS must be 4-byte aligned. + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (MFI->getObjectAlignment(FI) < 4) + MFI->setObjectAlignment(FI, 4); Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); @@ -2502,6 +2512,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); if (Subtarget->isThumb1Only()) { + // Set the alignment of the frame object to 4, to avoid having to generate + // more than one ADD + MachineFrameInfo *MFI = MF->getFrameInfo(); + if (MFI->getObjectAlignment(FI) < 4) + MFI->setObjectAlignment(FI, 4); return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, CurDAG->getTargetConstant(0, MVT::i32)); } else { diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index cc953c637cb..3c62e0ec2a8 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -1375,6 +1375,17 @@ def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr), def : T1Pat<(zextloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; +// extload from the stack -> word load from the stack, as it avoids having to +// materialize the base in a separate register. This only works when a word +// load puts the byte/halfword value in the same place in the register that the +// byte/halfword load would, i.e. when little-endian. +def : T1Pat<(extloadi1 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>, + Requires<[IsThumb, IsThumb1Only, IsLE]>; +def : T1Pat<(extloadi8 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>, + Requires<[IsThumb, IsThumb1Only, IsLE]>; +def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>, + Requires<[IsThumb, IsThumb1Only, IsLE]>; + // extload -> zextload def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>; def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>; diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index bd40658638a..a8d09818978 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -170,7 +170,8 @@ static int getMemoryOpOffset(const MachineInstr *MI) { return OffField; // Thumb1 immediate offsets are scaled by 4 - if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) + if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi || + Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) return OffField * 4; int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) @@ -206,6 +207,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ib: return ARM::STMIB; } case ARM::tLDRi: + case ARM::tLDRspi: // tLDMIA is writeback-only - unless the base register is in the input // reglist. ++NumLDMGened; @@ -214,6 +216,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::ia: return ARM::tLDMIA; } case ARM::tSTRi: + case ARM::tSTRspi: // There is no non-writeback tSTMIA either. ++NumSTMGened; switch (Mode) { @@ -328,7 +331,7 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { } // end namespace llvm static bool isT1i32Load(unsigned Opc) { - return Opc == ARM::tLDRi; + return Opc == ARM::tLDRi || Opc == ARM::tLDRspi; } static bool isT2i32Load(unsigned Opc) { @@ -340,7 +343,7 @@ static bool isi32Load(unsigned Opc) { } static bool isT1i32Store(unsigned Opc) { - return Opc == ARM::tSTRi; + return Opc == ARM::tSTRi || Opc == ARM::tSTRspi; } static bool isT2i32Store(unsigned Opc) { @@ -356,6 +359,8 @@ static unsigned getImmScale(unsigned Opc) { default: llvm_unreachable("Unhandled opcode!"); case ARM::tLDRi: case ARM::tSTRi: + case ARM::tLDRspi: + case ARM::tSTRspi: return 1; case ARM::tLDRHi: case ARM::tSTRHi: @@ -495,6 +500,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (isThumb1) for (unsigned I = 0; I < NumRegs; ++I) if (Base == Regs[I].first) { + assert(Base != ARM::SP && "Thumb1 does not allow SP in register list"); if (Opcode == ARM::tLDRi) { Writeback = false; break; @@ -515,7 +521,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; - } else if (Offset != 0) { + } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) { // Check if this is a supported opcode before inserting instructions to // calculate a new base register. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; @@ -545,6 +551,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, int BaseOpc = isThumb2 ? ARM::t2ADDri : + (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : (isThumb1 && Offset < 8) ? ARM::tADDi3 : isThumb1 ? ARM::tADDi8 : ARM::ADDri; @@ -552,7 +559,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, Offset = - Offset; BaseOpc = isThumb2 ? ARM::t2SUBri : - (isThumb1 && Offset < 8) ? ARM::tSUBi3 : + (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri; } @@ -566,7 +573,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // or // MOV NewBase, Base // ADDS NewBase, #imm8. - if (Base != NewBase && Offset >= 8) { + if (Base != NewBase && + (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) { // Need to insert a MOV to the new base first. if (isARMLowRegister(NewBase) && isARMLowRegister(Base) && !STI->hasV6Ops()) { @@ -584,9 +592,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, Base = NewBase; BaseKill = false; } - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) - .addImm(Pred).addReg(PredReg); + if (BaseOpc == ARM::tADDrSPi) { + assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4"); + BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4) + .addImm(Pred).addReg(PredReg); + } else + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + .addImm(Pred).addReg(PredReg); } else { BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) @@ -967,6 +981,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::STRi12: case ARM::tLDRi: case ARM::tSTRi: + case ARM::tLDRspi: + case ARM::tSTRspi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -1402,6 +1418,8 @@ static bool isMemoryOp(const MachineInstr *MI) { case ARM::STRi12: case ARM::tLDRi: case ARM::tSTRi: + case ARM::tLDRspi: + case ARM::tSTRspi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: |