diff options
author | Oliver Stannard <oliver.stannard@arm.com> | 2014-11-17 11:18:10 +0000 |
---|---|---|
committer | Oliver Stannard <oliver.stannard@arm.com> | 2014-11-17 11:18:10 +0000 |
commit | 970b0d576c35c296111ffd9be6ffd04c294760db (patch) | |
tree | 242293bd7d14f3ddb30481a61bc325bde333a682 /llvm/lib | |
parent | 236b0ca79055779c4d024d66dcd0cbc44265df7f (diff) | |
download | bcm5719-llvm-970b0d576c35c296111ffd9be6ffd04c294760db.tar.gz bcm5719-llvm-970b0d576c35c296111ffd9be6ffd04c294760db.zip |
[Thumb1] Re-write emitThumbRegPlusImmediate
This was motivated by a bug which caused code like this to be
miscompiled:
declare void @take_ptr(i8*)
define void @test() {
%addr1.32 = alloca i8
%addr2.32 = alloca i32, i32 1028
call void @take_ptr(i8* %addr1)
ret void
}
This was emitting the following assembly to get the value of %addr1:
add r0, sp, #1020
add r0, r0, #8
However, "add r0, r0, #8" is not a valid Thumb1 instruction, and this
could not be assembled. The generated object file contained this,
resulting in r0 holding SP+8 rather tha SP+1028:
add r0, sp, #1020
add r0, sp, #8
This function looked like it could have caused miscompilations for
other combinations of registers and offsets (though I don't think it is
currently called with these), and the heuristic it used did not match
the emitted code in all cases.
llvm-svn: 222125
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp | 270 |
1 files changed, 134 insertions, 136 deletions
diff --git a/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp b/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp index 9a01b8ad7cc..c10c80919d7 100644 --- a/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -66,6 +66,10 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, int Val, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { + assert((isARMLowRegister(DestReg) || + isVirtualRegister(DestReg)) && + "Thumb1 does not have ldr to high register"); + MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); @@ -106,15 +110,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, NumBytes = -NumBytes; } unsigned LdReg = DestReg; - if (DestReg == ARM::SP) { + if (DestReg == ARM::SP) assert(BaseReg == ARM::SP && "Unexpected!"); + if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg)) LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); - } - if (NumBytes <= 255 && NumBytes >= 0) + if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) { AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) .addImm(NumBytes).setMIFlags(MIFlags); - else if (NumBytes < 0 && NumBytes >= -255) { + } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) { AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) .addImm(NumBytes).setMIFlags(MIFlags); AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)) @@ -124,7 +128,8 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, ARMCC::AL, 0, MIFlags); // Emit add / sub. - int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); + int Opc = (isSub) ? ARM::tSUBrr : ((isHigh || !CanChangeCC) ? ARM::tADDhirr + : ARM::tADDrr); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (Opc != ARM::tADDhirr) @@ -136,32 +141,10 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, AddDefaultPred(MIB); } -/// calcNumMI - Returns the number of instructions required to materialize -/// the specific add / sub r, c instruction. -static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, - unsigned NumBits, unsigned Scale) { - unsigned NumMIs = 0; - unsigned Chunk = ((1 << NumBits) - 1) * Scale; - - if (Opc == ARM::tADDrSPi) { - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - NumMIs++; - NumBits = 8; - Scale = 1; // Followed by a number of tADDi8. - Chunk = ((1 << NumBits) - 1) * Scale; - } - - NumMIs += Bytes / Chunk; - if ((Bytes % Chunk) != 0) - NumMIs++; - if (ExtraOpc) - NumMIs++; - return NumMIs; -} - /// emitThumbRegPlusImmediate - Emits a series of instructions to materialize -/// a destreg = basereg + immediate in Thumb code. +/// a destreg = basereg + immediate in Thumb code. Tries a series of ADDs or +/// SUBs first, and uses a constant pool value if the instruction sequence would +/// be too long. This is allowed to modify the condition flags. void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, @@ -172,131 +155,146 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; if (isSub) Bytes = -NumBytes; - bool isMul4 = (Bytes & 3) == 0; - bool isTwoAddr = false; - bool DstNotEqBase = false; - unsigned NumBits = 1; - unsigned Scale = 1; - int Opc = 0; + + int CopyOpc = 0; + unsigned CopyBits = 0; + unsigned CopyScale = 1; + bool CopyNeedsCC = false; int ExtraOpc = 0; - bool NeedCC = false; - - if (DestReg == BaseReg && BaseReg == ARM::SP) { - assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); - NumBits = 7; - Scale = 4; - Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - isTwoAddr = true; - } else if (!isSub && BaseReg == ARM::SP) { - // r1 = add sp, 403 - // => - // r1 = add sp, 100 * 4 - // r1 = add r1, 3 - if (!isMul4) { - Bytes &= ~3; - ExtraOpc = ARM::tADDi3; + unsigned ExtraBits = 0; + unsigned ExtraScale = 1; + bool ExtraNeedsCC = false; + + // Strategy: + // We need to select two types of instruction, maximizing the available + // immediate range of each. The instructions we use will depend on whether + // DestReg and BaseReg are low, high or the stack pointer. + // * CopyOpc - DestReg = BaseReg + imm + // This will be emitted once if DestReg != BaseReg, and never if + // DestReg == BaseReg. + // * ExtraOpc - DestReg = DestReg + imm + // This will be emitted as many times as necessary to add the + // full immediate. + // If the immediate ranges of these instructions are not large enough to cover + // NumBytes with a reasonable number of instructions, we fall back to using a + // value loaded from a constant pool. + if (DestReg == ARM::SP) { + if (BaseReg == ARM::SP) { + // sp -> sp + // Already in right reg, no copy needed + } else { + // low -> sp or high -> sp + CopyOpc = ARM::tMOVr; + CopyBits = 0; } - DstNotEqBase = true; - NumBits = 8; - Scale = 4; - Opc = ARM::tADDrSPi; - } else { - // sp = sub sp, c - // r1 = sub sp, c - // r8 = sub sp, c - if (DestReg != BaseReg) - DstNotEqBase = true; - if (DestReg == ARM::SP) { - Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); - NumBits = 7; - Scale = 4; + ExtraOpc = isSub ? ARM::tSUBspi : ARM::tADDspi; + ExtraBits = 7; + ExtraScale = 4; + } else if (isARMLowRegister(DestReg)) { + if (BaseReg == ARM::SP) { + // sp -> low + assert(!isSub && "Thumb1 does not have tSUBrSPi"); + CopyOpc = ARM::tADDrSPi; + CopyBits = 8; + CopyScale = 4; + } else if (DestReg == BaseReg) { + // low -> same low + // Already in right reg, no copy needed + } else if (isARMLowRegister(BaseReg)) { + // low -> different low + CopyOpc = isSub ? ARM::tSUBi3 : ARM::tADDi3; + CopyBits = 3; + CopyNeedsCC = true; + } else { + // high -> low + CopyOpc = ARM::tMOVr; + CopyBits = 0; + } + ExtraOpc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + ExtraBits = 8; + ExtraNeedsCC = true; + } else /* DestReg is high */ { + if (DestReg == BaseReg) { + // high -> same high + // Already in right reg, no copy needed } else { - Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - NumBits = 8; - NeedCC = true; + // {low,high,sp} -> high + CopyOpc = ARM::tMOVr; + CopyBits = 0; } - isTwoAddr = true; + ExtraOpc = 0; } - unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale); + // We could handle an unaligned immediate with an unaligned copy instruction + // and an aligned extra instruction, but this case is not currently needed. + assert(((Bytes & 3) == 0 || ExtraScale == 1) && + "Unaligned offset, but all instructions require alignment"); + + unsigned CopyRange = ((1 << CopyBits) - 1) * CopyScale; + // If we would emit the copy with an immediate of 0, just use tMOVr. + if (CopyOpc && Bytes < CopyScale) { + CopyOpc = ARM::tMOVr; + CopyBits = 0; + CopyScale = 1; + CopyNeedsCC = false; + CopyRange = 0; + } + unsigned ExtraRange = ((1 << ExtraBits) - 1) * ExtraScale; // per instruction + unsigned RequiredCopyInstrs = CopyOpc ? 1 : 0; + unsigned RangeAfterCopy = (CopyRange > Bytes) ? 0 : (Bytes - CopyRange); + + // We could handle this case when the copy instruction does not require an + // aligned immediate, but we do not currently do this. + assert(RangeAfterCopy % ExtraScale == 0 && + "Extra instruction requires immediate to be aligned"); + + unsigned RequiredExtraInstrs; + if (ExtraRange) + RequiredExtraInstrs = RoundUpToAlignment(RangeAfterCopy, ExtraRange) / ExtraRange; + else if (RangeAfterCopy > 0) + // We need an extra instruction but none is available + RequiredExtraInstrs = 1000000; + else + RequiredExtraInstrs = 0; + unsigned RequiredInstrs = RequiredCopyInstrs + RequiredExtraInstrs; unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; - if (NumMIs > Threshold) { - // This will expand into too many instructions. Load the immediate from a - // constpool entry. + + // Use a constant pool, if the sequence of ADDs/SUBs is too expensive. + if (RequiredInstrs > Threshold) { emitThumbRegPlusImmInReg(MBB, MBBI, dl, DestReg, BaseReg, NumBytes, true, TII, MRI, MIFlags); return; } - if (DstNotEqBase) { - if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) { - // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) - unsigned Chunk = (1 << 3) - 1; - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); - const MachineInstrBuilder MIB = - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg) - .setMIFlags(MIFlags)); - AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); - } else if (isARMLowRegister(DestReg) && BaseReg == ARM::SP && Bytes > 0) { - unsigned ThisVal = std::min(1020U, Bytes / 4 * 4); - Bytes -= ThisVal; - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), DestReg) - .addReg(BaseReg, RegState::Kill).addImm(ThisVal / 4)) - .setMIFlags(MIFlags); - } else { - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) - .addReg(BaseReg, RegState::Kill)) - .setMIFlags(MIFlags); + // Emit zero or one copy instructions + if (CopyOpc) { + unsigned CopyImm = std::min(Bytes, CopyRange) / CopyScale; + Bytes -= CopyImm * CopyScale; + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg); + if (CopyNeedsCC) + MIB = AddDefaultT1CC(MIB); + MIB.addReg(BaseReg, RegState::Kill); + if (CopyOpc != ARM::tMOVr) { + MIB.addImm(CopyImm); } + AddDefaultPred(MIB.setMIFlags(MIFlags)); + BaseReg = DestReg; } - unsigned Chunk = ((1 << NumBits) - 1) * Scale; + // Emit zero or more in-place add/sub instructions while (Bytes) { - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - ThisVal /= Scale; - // Build the new tADD / tSUB. - if (isTwoAddr) { - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); - if (NeedCC) - MIB = AddDefaultT1CC(MIB); - MIB.addReg(DestReg).addImm(ThisVal); - MIB = AddDefaultPred(MIB); - MIB.setMIFlags(MIFlags); - } else { - bool isKill = BaseReg != ARM::SP; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); - if (NeedCC) - MIB = AddDefaultT1CC(MIB); - MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); - MIB = AddDefaultPred(MIB); - MIB.setMIFlags(MIFlags); - - BaseReg = DestReg; - if (Opc == ARM::tADDrSPi) { - // r4 = add sp, imm - // r4 = add r4, imm - // ... - NumBits = 8; - Scale = 1; - Chunk = ((1 << NumBits) - 1) * Scale; - Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - NeedCC = isTwoAddr = true; - } - } - } + unsigned ExtraImm = std::min(Bytes, ExtraRange) / ExtraScale; + Bytes -= ExtraImm * ExtraScale; - if (ExtraOpc) { - const MCInstrDesc &MCID = TII.get(ExtraOpc); - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)) - .addReg(DestReg, RegState::Kill) - .addImm(((unsigned)NumBytes) & 3) - .setMIFlags(MIFlags)); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg); + if (ExtraNeedsCC) + MIB = AddDefaultT1CC(MIB); + MIB.addReg(BaseReg).addImm(ExtraImm); + MIB = AddDefaultPred(MIB); + MIB.setMIFlags(MIFlags); } } |