diff options
author | Diogo Sampaio <diogo.sampaio@arm.com> | 2020-01-08 10:26:28 +0000 |
---|---|---|
committer | Diogo Sampaio <diogo.sampaio@arm.com> | 2020-01-10 11:25:44 +0000 |
commit | 8c12769f3046029e2a9b4e48e1645b1a77d28650 (patch) | |
tree | 358071d0bcd70bce130a2335b343497ef185e996 /llvm/lib/Target | |
parent | 1ccee0e86386762bd742fd067391b6c4be089806 (diff) | |
download | bcm5719-llvm-8c12769f3046029e2a9b4e48e1645b1a77d28650.tar.gz bcm5719-llvm-8c12769f3046029e2a9b4e48e1645b1a77d28650.zip |
[ARM][Thumb2] Fix ADD/SUB invalid writes to SP
Summary:
This patch fixes pr23772 [ARM] r226200 can emit illegal thumb2 instruction: "sub sp, r12, #80".
The violation was that SUB and ADD (reg, immediate) instructions can only write to SP if the source register is also SP. So the above instructions was unpredictable.
To enforce that the instruction t2(ADD|SUB)ri does not write to SP we now enforce the destination register to be rGPR (That exclude PC and SP).
Different than the ARM specification, that defines one instruction that can read from SP, and one that can't, here we inserted one that can't write to SP, and other that can only write to SP as to reuse most of the hard-coded size optimizations.
When performing this change, it uncovered that emitting Thumb2 Reg plus Immediate could not emit all variants of ADD SP, SP #imm instructions before so it was refactored to be able to. (see test/CodeGen/Thumb2/mve-stacksplot.mir where we use a subw sp, sp, Imm12 variant )
It also uncovered a disassembly issue of adr.w instructions, that were only written as SUBW instructions (see llvm/test/MC/Disassembler/ARM/thumb2.txt).
Reviewers: eli.friedman, dmgreen, carwil, olista01, efriedma
Reviewed By: efriedma
Subscribers: john.brawn, efriedma, ostannard, kristof.beyls, hiraditya, dmgreen, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70680
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrThumb2.td | 193 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 27 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 92 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 73 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/Thumb2InstrInfo.cpp | 22 |
7 files changed, 336 insertions, 85 deletions
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index d18084901e4..1dbfbcaa0d8 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1171,11 +1171,15 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { case ARM::ADDri: case ARM::t2ADDri: case ARM::t2ADDri12: + case ARM::t2ADDspImm: + case ARM::t2ADDspImm12: Offset = -MI->getOperand(2).getImm(); break; case ARM::SUBri: case ARM::t2SUBri: case ARM::t2SUBri12: + case ARM::t2SUBspImm: + case ARM::t2SUBspImm12: Offset = MI->getOperand(2).getImm(); break; case ARM::tSUBspi: diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 3dcca392259..6aa30330309 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -3257,22 +3257,26 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, } break; case ARM::t2ADDrr: - case ARM::t2SUBrr: + case ARM::t2SUBrr: { if (UseOpc == ARM::t2SUBrr && Commute) return false; // ADD/SUB are special because they're essentially the same operation, so // we can handle a larger range of immediates. + const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP; + const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; + const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; if (ARM_AM::isT2SOImmTwoPartVal(ImmVal)) - NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2ADDri : ARM::t2SUBri; + NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB; else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) { ImmVal = -ImmVal; - NewUseOpc = UseOpc == ARM::t2ADDrr ? ARM::t2SUBri : ARM::t2ADDri; + NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD; } else return false; SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); break; + } case ARM::t2ORRrr: case ARM::t2EORrr: if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index d07d6ec119f..4193e8147f4 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -918,10 +918,26 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, SDNode opnode, // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { + def spImm : T2sTwoRegImm< + (outs GPRsp:$Rd), (ins GPRsp:$Rn, t2_so_imm:$imm), IIC_iALUi, + opc, ".w\t$Rd, $Rn, $imm", + []>, + Sched<[WriteALU, ReadALU]> { + let Rn = 13; + let Rd = 13; + + let Inst{31-27} = 0b11110; + let Inst{25-24} = 0b01; + let Inst{23-21} = op23_21; + let Inst{15} = 0; + + let DecoderMethod = "DecodeT2AddSubSPImm"; + } + def ri : T2sTwoRegImm< - (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi, + (outs rGPR:$Rd), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iALUi, opc, ".w\t$Rd, $Rn, $imm", - [(set GPRnopc:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]>, + [(set rGPR:$Rd, (opnode GPRnopc:$Rn, t2_so_imm:$imm))]>, Sched<[WriteALU, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -932,9 +948,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, SDNode opnode, } // 12-bit imm def ri12 : T2I< - (outs GPRnopc:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi, + (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi, !strconcat(opc, "w"), "\t$Rd, $Rn, $imm", - [(set GPRnopc:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]>, + [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]>, Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; @@ -950,6 +966,26 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, SDNode opnode, let Inst{11-8} = Rd; let Inst{7-0} = imm{7-0}; } + def spImm12 : T2I< + (outs GPRsp:$Rd), (ins GPRsp:$Rn, imm0_4095:$imm), IIC_iALUi, + !strconcat(opc, "w"), "\t$Rd, $Rn, $imm", + []>, + Sched<[WriteALU, ReadALU]> { + bits<4> Rd = 13; + bits<4> Rn = 13; + bits<12> imm; + let Inst{31-27} = 0b11110; + let Inst{26} = imm{11}; + let Inst{25-24} = 0b10; + let Inst{23-21} = op23_21; + let Inst{20} = 0; // The S bit. + let Inst{19-16} = Rn; + let Inst{15} = 0; + let Inst{14-12} = imm{10-8}; + let Inst{11-8} = Rd; + let Inst{7-0} = imm{7-0}; + let DecoderMethod = "DecodeT2AddSubSPImm"; + } // register def rr : T2sThreeReg<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iALUr, opc, ".w\t$Rd, $Rn, $Rm", @@ -2267,19 +2303,29 @@ def : t2InstSubst<"sbc${s}${p} $rd, $rn, $imm", (t2ADCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>; def : t2InstSubst<"add${s}${p}.w $rd, $rn, $imm", - (t2SUBri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; -def : t2InstSubst<"addw${p} $rd, $rn, $imm", - (t2SUBri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>; + (t2SUBri rGPR:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; def : t2InstSubst<"sub${s}${p}.w $rd, $rn, $imm", - (t2ADDri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; -def : t2InstSubst<"subw${p} $rd, $rn, $imm", - (t2ADDri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>; + (t2ADDri rGPR:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; def : t2InstSubst<"subw${p} $Rd, $Rn, $imm", - (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; + (t2ADDri12 rGPR:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; def : t2InstSubst<"sub${s}${p} $rd, $rn, $imm", - (t2ADDri GPRnopc:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; + (t2ADDri rGPR:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; def : t2InstSubst<"sub${p} $rd, $rn, $imm", - (t2ADDri12 GPRnopc:$rd, GPR:$rn, t2_so_imm_neg:$imm, pred:$p)>; + (t2ADDri12 rGPR:$rd, GPR:$rn, imm0_4095_neg:$imm, pred:$p)>; + +// SP to SP alike +def : t2InstSubst<"add${s}${p}.w $rd, $rn, $imm", + (t2SUBspImm GPRsp:$rd, GPRsp:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; +def : t2InstSubst<"sub${s}${p}.w $rd, $rn, $imm", + (t2ADDspImm GPRsp:$rd, GPRsp:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; +def : t2InstSubst<"subw${p} $Rd, $Rn, $imm", + (t2ADDspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095_neg:$imm, pred:$p)>; +def : t2InstSubst<"sub${s}${p} $rd, $rn, $imm", + (t2ADDspImm GPRsp:$rd, GPRsp:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>; +def : t2InstSubst<"sub${p} $rd, $rn, $imm", + (t2ADDspImm12 GPRsp:$rd, GPRsp:$rn, imm0_4095_neg:$imm, pred:$p)>; + + // RSB defm t2RSB : T2I_rbin_irs <0b1110, "rsb", sub>; @@ -2295,12 +2341,12 @@ defm t2RSBS : T2I_rbin_s_is <ARMsubc>; // The AddedComplexity preferences the first variant over the others since // it can be shrunk to a 16-bit wide encoding, while the others cannot. let AddedComplexity = 1 in -def : T2Pat<(add GPR:$src, imm1_255_neg:$imm), - (t2SUBri GPR:$src, imm1_255_neg:$imm)>; -def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm), - (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>; -def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm), - (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>; +def : T2Pat<(add rGPR:$src, imm1_255_neg:$imm), + (t2SUBri rGPR:$src, imm1_255_neg:$imm)>; +def : T2Pat<(add rGPR:$src, t2_so_imm_neg:$imm), + (t2SUBri rGPR:$src, t2_so_imm_neg:$imm)>; +def : T2Pat<(add rGPR:$src, imm0_4095_neg:$imm), + (t2SUBri12 rGPR:$src, imm0_4095_neg:$imm)>; def : T2Pat<(add GPR:$src, imm0_65535_neg:$imm), (t2SUBrr GPR:$src, (t2MOVi16 (imm_neg_XFORM imm:$imm)))>; @@ -2799,10 +2845,10 @@ def : T2Pat<(t2_so_imm_not:$src), // Thumb2SizeReduction's chances later on we select a t2ADD for an or where // possible. def : T2Pat<(or AddLikeOrOp:$Rn, t2_so_imm:$imm), - (t2ADDri $Rn, t2_so_imm:$imm)>; + (t2ADDri rGPR:$Rn, t2_so_imm:$imm)>; def : T2Pat<(or AddLikeOrOp:$Rn, imm0_4095:$Rm), - (t2ADDri12 $Rn, imm0_4095:$Rm)>; + (t2ADDri12 rGPR:$Rn, imm0_4095:$Rm)>; def : T2Pat<(or AddLikeOrOp:$Rn, non_imm32:$Rm), (t2ADDrr $Rn, $Rm)>; @@ -4666,10 +4712,10 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm", // Aliases for ADD without the ".w" optional width specifier. def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", - (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, + (t2ADDri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"add${p} $Rd, $Rn, $imm", - (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; + (t2ADDri12 rGPR:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm", (t2ADDrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm", @@ -4677,9 +4723,11 @@ def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm", pred:$p, cc_out:$s)>; // ... and with the destination and source register combined. def : t2InstAlias<"add${s}${p} $Rdn, $imm", - (t2ADDri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + (t2ADDri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"add${p} $Rdn, $imm", - (t2ADDri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>; + (t2ADDri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"addw${p} $Rdn, $imm", + (t2ADDri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095:$imm, pred:$p)>; def : t2InstAlias<"add${s}${p} $Rdn, $Rm", (t2ADDrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm", @@ -4688,33 +4736,33 @@ def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm", // add w/ negative immediates is just a sub. def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", - (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, + (t2SUBri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; def : t2InstSubst<"add${p} $Rd, $Rn, $imm", - (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; + (t2SUBri12 rGPR:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; def : t2InstSubst<"add${s}${p} $Rdn, $imm", - (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, + (t2SUBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; def : t2InstSubst<"add${p} $Rdn, $imm", - (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + (t2SUBri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095_neg:$imm, pred:$p)>; def : t2InstSubst<"add${s}${p}.w $Rd, $Rn, $imm", - (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, + (t2SUBri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; def : t2InstSubst<"addw${p} $Rd, $Rn, $imm", - (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; + (t2SUBri12 rGPR:$Rd, rGPR:$Rn, imm0_4095_neg:$imm, pred:$p)>; def : t2InstSubst<"add${s}${p}.w $Rdn, $imm", - (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p, + (t2SUBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; def : t2InstSubst<"addw${p} $Rdn, $imm", - (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + (t2SUBri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095_neg:$imm, pred:$p)>; // Aliases for SUB without the ".w" optional width specifier. def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm", - (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + (t2SUBri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${p} $Rd, $Rn, $imm", - (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; + (t2SUBri12 rGPR:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm", (t2SUBrr GPRnopc:$Rd, GPRnopc:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm", @@ -4722,9 +4770,11 @@ def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm", pred:$p, cc_out:$s)>; // ... and with the destination and source register combined. def : t2InstAlias<"sub${s}${p} $Rdn, $imm", - (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + (t2SUBri rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${p} $Rdn, $imm", - (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>; + (t2SUBri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"subw${p} $Rdn, $imm", + (t2SUBri12 rGPR:$Rdn, rGPR:$Rdn, imm0_4095:$imm, pred:$p)>; def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm", (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"sub${s}${p} $Rdn, $Rm", @@ -4733,6 +4783,65 @@ def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm", (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; +// SP to SP alike aliases +// Aliases for ADD without the ".w" optional width specifier. +def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", + (t2ADDspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm:$imm, pred:$p, + cc_out:$s)>; +def : t2InstAlias<"add${p} $Rd, $Rn, $imm", + (t2ADDspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095:$imm, pred:$p)>; +// ... and with the destination and source register combined. +def : t2InstAlias<"add${s}${p} $Rdn, $imm", + (t2ADDspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + +def : t2InstAlias<"add${s}${p}.w $Rdn, $imm", + (t2ADDspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; + +def : t2InstAlias<"add${p} $Rdn, $imm", + (t2ADDspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095:$imm, pred:$p)>; + +def : t2InstAlias<"addw${p} $Rdn, $imm", + (t2ADDspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095:$imm, pred:$p)>; + +// add w/ negative immediates is just a sub. +def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", + (t2SUBspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstSubst<"add${p} $Rd, $Rn, $imm", + (t2SUBspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095_neg:$imm, pred:$p)>; +def : t2InstSubst<"add${s}${p} $Rdn, $imm", + (t2SUBspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstSubst<"add${p} $Rdn, $imm", + (t2SUBspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + +def : t2InstSubst<"add${s}${p}.w $Rd, $Rn, $imm", + (t2SUBspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstSubst<"addw${p} $Rd, $Rn, $imm", + (t2SUBspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095_neg:$imm, pred:$p)>; +def : t2InstSubst<"add${s}${p}.w $Rdn, $imm", + (t2SUBspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm_neg:$imm, pred:$p, + cc_out:$s)>; +def : t2InstSubst<"addw${p} $Rdn, $imm", + (t2SUBspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095_neg:$imm, pred:$p)>; + + +// Aliases for SUB without the ".w" optional width specifier. +def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm", + (t2SUBspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${p} $Rd, $Rn, $imm", + (t2SUBspImm12 GPRsp:$Rd, GPRsp:$Rn, imm0_4095:$imm, pred:$p)>; +// ... and with the destination and source register combined. +def : t2InstAlias<"sub${s}${p} $Rdn, $imm", + (t2SUBspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${s}${p}.w $Rdn, $imm", + (t2SUBspImm GPRsp:$Rdn, GPRsp:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${p} $Rdn, $imm", + (t2SUBspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"subw${p} $Rdn, $imm", + (t2SUBspImm12 GPRsp:$Rdn, GPRsp:$Rdn, imm0_4095:$imm, pred:$p)>; + // Alias for compares without the ".w" optional width specifier. def : t2InstAlias<"cmn${p} $Rn, $Rm", (t2CMNzrr GPRnopc:$Rn, rGPR:$Rm, pred:$p)>; @@ -4989,10 +5098,16 @@ def : t2InstSubst<"orr${s}${p} $Rdn, $imm", pred:$p, cc_out:$s)>; // Likewise, "add Rd, t2_so_imm_neg" -> sub def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", - (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, + (t2SUBri rGPR:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"add${s}${p} $Rd, $Rn, $imm", + (t2SUBspImm GPRsp:$Rd, GPRsp:$Rn, t2_so_imm_neg:$imm, + pred:$p, cc_out:$s)>; +def : t2InstSubst<"add${s}${p} $Rd, $imm", + (t2SUBri rGPR:$Rd, rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; def : t2InstSubst<"add${s}${p} $Rd, $imm", - (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm, + (t2SUBspImm GPRsp:$Rd, GPRsp:$Rd, t2_so_imm_neg:$imm, pred:$p, cc_out:$s)>; // Same for CMP <--> CMN via t2_so_imm_neg def : t2InstSubst<"cmp${p} $Rd, $imm", diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 4a193fed04a..12dddd29ca8 100644 --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -696,18 +696,23 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( return nullptr; } - int BaseOpc = - isThumb2 ? ARM::t2ADDri : - (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi : - (isThumb1 && Offset < 8) ? ARM::tADDi3 : - isThumb1 ? ARM::tADDi8 : ARM::ADDri; + int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm + : ARM::t2ADDri) + : (isThumb1 && Base == ARM::SP) + ? ARM::tADDrSPi + : (isThumb1 && Offset < 8) + ? ARM::tADDi3 + : isThumb1 ? ARM::tADDi8 : ARM::ADDri; if (Offset < 0) { - Offset = - Offset; - BaseOpc = - isThumb2 ? ARM::t2SUBri : - (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 : - isThumb1 ? ARM::tSUBi8 : ARM::SUBri; + // FIXME: There are no Thumb1 load/store instructions with negative + // offsets. So the Base != ARM::SP might be unnecessary. + Offset = -Offset; + BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm + : ARM::t2SUBri) + : (isThumb1 && Offset < 8 && Base != ARM::SP) + ? ARM::tSUBi3 + : isThumb1 ? ARM::tSUBi8 : ARM::SUBri; } if (!TL->isLegalAddImmediate(Offset)) @@ -1186,8 +1191,10 @@ static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg, case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break; case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break; case ARM::t2SUBri: + case ARM::t2SUBspImm: case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break; case ARM::t2ADDri: + case ARM::t2ADDspImm: case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break; case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break; case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break; diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 28b11ad3f1a..f5d1166d9df 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -6554,7 +6554,8 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // Check against T3. If the second register is the PC, this is an // alternate form of ADR, which uses encoding T4, so check for that too. if (static_cast<ARMOperand &>(*Operands[4]).getReg() != ARM::PC && - static_cast<ARMOperand &>(*Operands[5]).isT2SOImm()) + (static_cast<ARMOperand &>(*Operands[5]).isT2SOImm() || + static_cast<ARMOperand &>(*Operands[5]).isT2SOImmNeg())) return false; // Otherwise, we use encoding T4, which does not have a cc_out @@ -6609,9 +6610,34 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 && (static_cast<ARMOperand &>(*Operands[4]).isImm() || (Operands.size() == 6 && - static_cast<ARMOperand &>(*Operands[5]).isImm()))) - return true; - + static_cast<ARMOperand &>(*Operands[5]).isImm()))) { + // Thumb2 (add|sub){s}{p}.w GPRnopc, sp, #{T2SOImm} has cc_out + return (!(isThumbTwo() && + (static_cast<ARMOperand &>(*Operands[4]).isT2SOImm() || + static_cast<ARMOperand &>(*Operands[4]).isT2SOImmNeg()))); + } + // Fixme: Should join all the thumb+thumb2 (add|sub) in a single if case + // Thumb2 ADD r0, #4095 -> ADDW r0, r0, #4095 (T4) + // Thumb2 SUB r0, #4095 -> SUBW r0, r0, #4095 + if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") && + (Operands.size() == 5) && + static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[3]).getReg() != ARM::SP && + static_cast<ARMOperand &>(*Operands[3]).getReg() != ARM::PC && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 && + static_cast<ARMOperand &>(*Operands[4]).isImm()) { + const ARMOperand &IMM = static_cast<ARMOperand &>(*Operands[4]); + if (IMM.isT2SOImm() || IMM.isT2SOImmNeg()) + return false; // add.w / sub.w + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(IMM.getImm())) { + const int64_t Value = CE->getValue(); + // Thumb1 imm8 sub / add + if ((Value < ((1 << 7) - 1) << 2) && inITBlock() && (!(Value & 3)) && + isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg())) + return false; + return true; // Thumb2 T4 addw / subw + } + } return false; } @@ -7707,12 +7733,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, } break; - case ARM::t2ADDri: - case ARM::t2ADDri12: case ARM::t2ADDrr: case ARM::t2ADDrs: - case ARM::t2SUBri: - case ARM::t2SUBri12: case ARM::t2SUBrr: case ARM::t2SUBrs: if (Inst.getOperand(0).getReg() == ARM::SP && @@ -9750,23 +9772,33 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, } break; case ARM::t2ADDri12: - // If the immediate fits for encoding T3 (t2ADDri) and the generic "add" - // mnemonic was used (not "addw"), encoding T3 is preferred. - if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "add" || - ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) - break; - Inst.setOpcode(ARM::t2ADDri); - Inst.addOperand(MCOperand::createReg(0)); // cc_out - break; case ARM::t2SUBri12: - // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub" - // mnemonic was used (not "subw"), encoding T3 is preferred. - if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "sub" || + case ARM::t2ADDspImm12: + case ARM::t2SUBspImm12: { + // If the immediate fits for encoding T3 and the generic + // mnemonic was used, encoding T3 is preferred. + const StringRef Token = static_cast<ARMOperand &>(*Operands[0]).getToken(); + if ((Token != "add" && Token != "sub") || ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) break; - Inst.setOpcode(ARM::t2SUBri); + switch (Inst.getOpcode()) { + case ARM::t2ADDri12: + Inst.setOpcode(ARM::t2ADDri); + break; + case ARM::t2SUBri12: + Inst.setOpcode(ARM::t2SUBri); + break; + case ARM::t2ADDspImm12: + Inst.setOpcode(ARM::t2ADDspImm); + break; + case ARM::t2SUBspImm12: + Inst.setOpcode(ARM::t2SUBspImm); + break; + } + Inst.addOperand(MCOperand::createReg(0)); // cc_out - break; + return true; + } case ARM::tADDi8: // If the immediate is in the range 0-7, we want tADDi3 iff Rd was // explicitly specified. From the ARM ARM: "Encoding T1 is preferred @@ -9812,6 +9844,24 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, Inst = TmpInst; return true; } + case ARM::t2ADDspImm: + case ARM::t2SUBspImm: { + // Prefer T1 encoding if possible + if (Inst.getOperand(5).getReg() != 0 || HasWideQualifier) + break; + unsigned V = Inst.getOperand(2).getImm(); + if (V & 3 || V > ((1 << 7) - 1) << 2) + break; + MCInst TmpInst; + TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDspImm ? ARM::tADDspi + : ARM::tSUBspi); + TmpInst.addOperand(MCOperand::createReg(ARM::SP)); // destination reg + TmpInst.addOperand(MCOperand::createReg(ARM::SP)); // source reg + TmpInst.addOperand(MCOperand::createImm(V / 4)); // immediate + TmpInst.addOperand(Inst.getOperand(3)); // pred + Inst = TmpInst; + return true; + } case ARM::t2ADDrr: { // If the destination and first source operand are the same, and // there's no setting of the flags, use encoding T2 instead of T3. diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index eabc26d05f4..97a28fa233d 100644 --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -204,6 +204,9 @@ static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, @@ -566,6 +569,9 @@ static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, static DecodeStatus DecodeMVEOverlappingLongShift(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + #include "ARMGenDisassemblerTables.inc" static MCDisassembler *createARMDisassembler(const Target &T, @@ -1231,6 +1237,17 @@ static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, return S; } +static DecodeStatus DecodeGPRspRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo != 13) + return MCDisassembler::Fail; + + unsigned Register = GPRDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { unsigned Register = 0; @@ -5588,14 +5605,25 @@ static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn, unsigned sign1 = fieldFromInstruction(Insn, 21, 1); unsigned sign2 = fieldFromInstruction(Insn, 23, 1); if (sign1 != sign2) return MCDisassembler::Fail; + const unsigned Rd = fieldFromInstruction(Insn, 8, 4); + assert(Inst.getNumOperands() == 0 && "We should receive an empty Inst"); + DecodeStatus S = DecoderGPRRegisterClass(Inst, Rd, Address, Decoder); unsigned Val = fieldFromInstruction(Insn, 0, 8); Val |= fieldFromInstruction(Insn, 12, 3) << 8; Val |= fieldFromInstruction(Insn, 26, 1) << 11; - Val |= sign1 << 12; - Inst.addOperand(MCOperand::createImm(SignExtend32<13>(Val))); - - return MCDisassembler::Success; + // If sign, then it is decreasing the address. + if (sign1) { + // Following ARMv7 Architecture Manual, when the offset + // is zero, it is decoded as a subw, not as a adr.w + if (!Val) { + Inst.setOpcode(ARM::t2SUBri12); + Inst.addOperand(MCOperand::createReg(ARM::PC)); + } else + Val = -Val; + } + Inst.addOperand(MCOperand::createImm(Val)); + return S; } static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val, @@ -6595,3 +6623,40 @@ static DecodeStatus DecodeMVEVPNOT(MCInst &Inst, unsigned Insn, uint64_t Address Inst.addOperand(MCOperand::createReg(ARM::VPR)); return S; } + +static DecodeStatus DecodeT2AddSubSPImm(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + const unsigned Rd = fieldFromInstruction(Insn, 8, 4); + const unsigned Rn = fieldFromInstruction(Insn, 16, 4); + const unsigned Imm12 = fieldFromInstruction(Insn, 26, 1) << 11 | + fieldFromInstruction(Insn, 12, 3) << 8 | + fieldFromInstruction(Insn, 0, 8); + const unsigned TypeT3 = fieldFromInstruction(Insn, 25, 1); + unsigned sign1 = fieldFromInstruction(Insn, 21, 1); + unsigned sign2 = fieldFromInstruction(Insn, 23, 1); + unsigned S = fieldFromInstruction(Insn, 20, 1); + if (sign1 != sign2) + return MCDisassembler::Fail; + + // T3 does a zext of imm12, where T2 does a ThumbExpandImm (T2SOImm) + DecodeStatus DS = MCDisassembler::Success; + if ((!Check(DS, + DecodeGPRspRegisterClass(Inst, Rd, Address, Decoder))) || // dst + (!Check(DS, DecodeGPRspRegisterClass(Inst, Rn, Address, Decoder)))) + return MCDisassembler::Fail; + if (TypeT3) { + Inst.setOpcode(sign1 ? ARM::t2SUBspImm12 : ARM::t2ADDspImm12); + S = 0; + Inst.addOperand(MCOperand::createImm(Imm12)); // zext imm12 + } else { + Inst.setOpcode(sign1 ? ARM::t2SUBspImm : ARM::t2ADDspImm); + if (!Check(DS, DecodeT2SOImm(Inst, Imm12, Address, Decoder))) // imm12 + return MCDisassembler::Fail; + } + if (!Check(DS, DecodeCCOutOperand(Inst, S, Address, Decoder))) // cc_out + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::createReg(0)); // pred + + return DS; +} diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index dbb1b27a67a..e06bb9546c0 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -319,15 +319,19 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, } bool HasCCOut = true; int ImmIsT2SO = ARM_AM::getT2SOImmVal(ThisVal); - - Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; + bool ToSP = DestReg == ARM::SP; + unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri; + unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri; + unsigned t2SUBi12 = ToSP ? ARM::t2SUBspImm12 : ARM::t2SUBri12; + unsigned t2ADDi12 = ToSP ? ARM::t2ADDspImm12 : ARM::t2ADDri12; + Opc = isSub ? t2SUB : t2ADD; // Prefer T2: sub rd, rn, so_imm | sub sp, sp, so_imm if (ImmIsT2SO != -1) { NumBytes = 0; } else if (ThisVal < 4096) { // Prefer T3 if can make it in a single go: subw rd, rn, imm12 | subw sp, // sp, imm12 - Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; + Opc = isSub ? t2SUBi12 : t2ADDi12; HasCCOut = false; NumBytes = 0; } else { @@ -483,7 +487,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR) AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2? - if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) { + const bool IsSP = Opcode == ARM::t2ADDspImm12 || Opcode == ARM::t2ADDspImm; + if (IsSP || Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) { Offset += MI.getOperand(FrameRegIdx+1).getImm(); unsigned PredReg; @@ -500,14 +505,14 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return true; } - bool HasCCOut = Opcode != ARM::t2ADDri12; + bool HasCCOut = (Opcode != ARM::t2ADDspImm12 && Opcode != ARM::t2ADDri12); if (Offset < 0) { Offset = -Offset; isSub = true; - MI.setDesc(TII.get(ARM::t2SUBri)); + MI.setDesc(IsSP ? TII.get(ARM::t2SUBspImm) : TII.get(ARM::t2SUBri)); } else { - MI.setDesc(TII.get(ARM::t2ADDri)); + MI.setDesc(IsSP ? TII.get(ARM::t2ADDspImm) : TII.get(ARM::t2ADDri)); } // Common case: small offset, fits into instruction. @@ -523,7 +528,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, // Another common case: imm12. if (Offset < 4096 && (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) { - unsigned NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; + unsigned NewOpc = isSub ? IsSP ? ARM::t2SUBspImm12 : ARM::t2SUBri12 + : IsSP ? ARM::t2ADDspImm12 : ARM::t2ADDri12; MI.setDesc(TII.get(NewOpc)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); |