summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorRenato Golin <renato.golin@linaro.org>2015-02-25 14:41:06 +0000
committerRenato Golin <renato.golin@linaro.org>2015-02-25 14:41:06 +0000
commitb9887ef32a5d06108dfabbbe181bd8e4ea7abbfe (patch)
treee470f2ae641ff070823cdb61580fd2a777e60b22 /llvm/lib
parenta9b01eb77641cce46dcc69bce27635a72187a89c (diff)
downloadbcm5719-llvm-b9887ef32a5d06108dfabbbe181bd8e4ea7abbfe.tar.gz
bcm5719-llvm-b9887ef32a5d06108dfabbbe181bd8e4ea7abbfe.zip
Improve handling of stack accesses in Thumb-1
Thumb-1 only allows SP-based LDR and STR to be word-sized, and SP-base LDR, STR, and ADD only allow offsets that are a multiple of 4. Make some changes to better make use of these instructions: * Use word loads for anyext byte and halfword loads from the stack. * Enforce 4-byte alignment on objects accessed in this way, to ensure that the offset is valid. * Do the same for objects whose frame index is used, in order to avoid having to use more than one ADD to generate the frame index. * Correct how many bits of offset we think AddrModeT1_s has. Patch by John Brawn. llvm-svn: 230496
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp7
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp15
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb.td11
-rw-r--r--llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp36
4 files changed, 57 insertions, 12 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b3fd4033d21..7574727c242 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -546,12 +546,13 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
// and pick a real one.
Offset += 128; // 128 bytes of spill slots
- // If there is a frame pointer, try using it.
+ // If there's a frame pointer and the addressing mode allows it, try using it.
// The FP is only available if there is no dynamic realignment. We
// don't know for sure yet whether we'll need that, so we guess based
// on whether there are any local variables that would trigger it.
unsigned StackAlign = TFI->getStackAlignment();
- if (TFI->hasFP(MF) &&
+ if (TFI->hasFP(MF) &&
+ (MI->getDesc().TSFlags & ARMII::AddrModeMask) != ARMII::AddrModeT1_s &&
!((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
if (isFrameOffsetLegal(MI, FPOffset))
return false;
@@ -668,7 +669,7 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
NumBits = 8;
break;
case ARMII::AddrModeT1_s:
- NumBits = 5;
+ NumBits = 8;
Scale = 4;
isSigned = false;
break;
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 77464bd47ea..6ebf640e0b7 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1197,6 +1197,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
SDValue &Base, SDValue &OffImm) {
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ // Only multiples of 4 are allowed for the offset, so the frame object
+ // alignment must be at least 4.
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (MFI->getObjectAlignment(FI) < 4)
+ MFI->setObjectAlignment(FI, 4);
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
OffImm = CurDAG->getTargetConstant(0, MVT::i32);
return true;
@@ -1214,6 +1219,11 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ // For LHS+RHS to result in an offset that's a multiple of 4 the object
+ // indexed by the LHS must be 4-byte aligned.
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (MFI->getObjectAlignment(FI) < 4)
+ MFI->setObjectAlignment(FI, 4);
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
}
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
@@ -2502,6 +2512,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
if (Subtarget->isThumb1Only()) {
+ // Set the alignment of the frame object to 4, to avoid having to generate
+ // more than one ADD
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (MFI->getObjectAlignment(FI) < 4)
+ MFI->setObjectAlignment(FI, 4);
return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
CurDAG->getTargetConstant(0, MVT::i32));
} else {
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index cc953c637cb..3c62e0ec2a8 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1375,6 +1375,17 @@ def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
(tLDRBi t_addrmode_is1:$addr)>;
+// extload from the stack -> word load from the stack, as it avoids having to
+// materialize the base in a separate register. This only works when a word
+// load puts the byte/halfword value in the same place in the register that the
+// byte/halfword load would, i.e. when little-endian.
+def : T1Pat<(extloadi1 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
+ Requires<[IsThumb, IsThumb1Only, IsLE]>;
+def : T1Pat<(extloadi8 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
+ Requires<[IsThumb, IsThumb1Only, IsLE]>;
+def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
+ Requires<[IsThumb, IsThumb1Only, IsLE]>;
+
// extload -> zextload
def : T1Pat<(extloadi1 t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
def : T1Pat<(extloadi1 t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index bd40658638a..a8d09818978 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -170,7 +170,8 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
return OffField;
// Thumb1 immediate offsets are scaled by 4
- if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
+ if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
+ Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
return OffField * 4;
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
@@ -206,6 +207,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ib: return ARM::STMIB;
}
case ARM::tLDRi:
+ case ARM::tLDRspi:
// tLDMIA is writeback-only - unless the base register is in the input
// reglist.
++NumLDMGened;
@@ -214,6 +216,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::ia: return ARM::tLDMIA;
}
case ARM::tSTRi:
+ case ARM::tSTRspi:
// There is no non-writeback tSTMIA either.
++NumSTMGened;
switch (Mode) {
@@ -328,7 +331,7 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
} // end namespace llvm
static bool isT1i32Load(unsigned Opc) {
- return Opc == ARM::tLDRi;
+ return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
}
static bool isT2i32Load(unsigned Opc) {
@@ -340,7 +343,7 @@ static bool isi32Load(unsigned Opc) {
}
static bool isT1i32Store(unsigned Opc) {
- return Opc == ARM::tSTRi;
+ return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
}
static bool isT2i32Store(unsigned Opc) {
@@ -356,6 +359,8 @@ static unsigned getImmScale(unsigned Opc) {
default: llvm_unreachable("Unhandled opcode!");
case ARM::tLDRi:
case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
return 1;
case ARM::tLDRHi:
case ARM::tSTRHi:
@@ -495,6 +500,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
if (isThumb1)
for (unsigned I = 0; I < NumRegs; ++I)
if (Base == Regs[I].first) {
+ assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
if (Opcode == ARM::tLDRi) {
Writeback = false;
break;
@@ -515,7 +521,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
} else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
// VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
- } else if (Offset != 0) {
+ } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
// Check if this is a supported opcode before inserting instructions to
// calculate a new base register.
if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
@@ -545,6 +551,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
int BaseOpc =
isThumb2 ? ARM::t2ADDri :
+ (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
(isThumb1 && Offset < 8) ? ARM::tADDi3 :
isThumb1 ? ARM::tADDi8 : ARM::ADDri;
@@ -552,7 +559,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
Offset = - Offset;
BaseOpc =
isThumb2 ? ARM::t2SUBri :
- (isThumb1 && Offset < 8) ? ARM::tSUBi3 :
+ (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
}
@@ -566,7 +573,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// or
// MOV NewBase, Base
// ADDS NewBase, #imm8.
- if (Base != NewBase && Offset >= 8) {
+ if (Base != NewBase &&
+ (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
// Need to insert a MOV to the new base first.
if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
!STI->hasV6Ops()) {
@@ -584,9 +592,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
Base = NewBase;
BaseKill = false;
}
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
- .addImm(Pred).addReg(PredReg);
+ if (BaseOpc == ARM::tADDrSPi) {
+ assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
+ .addImm(Pred).addReg(PredReg);
+ } else
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg);
} else {
BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
.addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
@@ -967,6 +981,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::STRi12:
case ARM::tLDRi:
case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
@@ -1402,6 +1418,8 @@ static bool isMemoryOp(const MachineInstr *MI) {
case ARM::STRi12:
case ARM::tLDRi:
case ARM::tSTRi:
+ case ARM::tLDRspi:
+ case ARM::tSTRspi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
OpenPOWER on IntegriCloud