diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrFormats.td | 134 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrInfo.td | 15 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrVFP.td | 383 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMSchedule.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 28 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h | 51 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 41 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h | 7 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 49 |
12 files changed, 784 insertions, 6 deletions
diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td index e79608d360c..50bb9af71da 100644 --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -1495,6 +1495,32 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, let D = VFPNeonDomain; } +class AHI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, + InstrItinClass itin, + string opc, string asm, list<dag> pattern> + : VFPI<oops, iops, AddrMode5, 4, IndexModeNone, + VFPLdStFrm, itin, opc, asm, "", pattern> { + list<Predicate> Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<13> addr; + + // Encode instruction operands. + let Inst{23} = addr{8}; // U (add = (U == '1')) + let Inst{22} = Sd{0}; + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Sd{4-1}; + let Inst{7-0} = addr{7-0}; // imm8 + + let Inst{27-24} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1001; // Half precision + + // Loads & stores operate on both NEON and VFP pipelines. + let D = VFPNeonDomain; +} + // VFP Load / store multiple pseudo instructions. class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr, list<dag> pattern> @@ -1817,6 +1843,114 @@ class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{22} = Sd{0}; } +// Half precision, unary, predicated +class AHuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, + string asm, list<dag> pattern> + : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> { + list<Predicate> Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-8} = 0b1001; // Half precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + +// Half precision, unary, non-predicated +class AHuInp<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list<dag> pattern> + : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, + VFPUnaryFrm, itin, asm, "", pattern> { + list<Predicate> Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-8} = 0b1001; // Half precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + +// Half precision, binary +class AHbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> { + list<Predicate> Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1001; // Half precision + let Inst{6} = op6; + let Inst{4} = op4; +} + +// Half precision, binary, not predicated +class AHbInp<bits<5> opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, + InstrItinClass itin, string asm, list<dag> pattern> + : VFPXI<oops, iops, AddrModeNone, 4, IndexModeNone, + VFPBinaryFrm, itin, asm, "", pattern> { + list<Predicate> Predicates = [HasFullFP16]; + + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-8} = 0b1001; // Half precision + let Inst{6} = opcod3; + let Inst{4} = 0; +} + // VFP conversion instructions class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4, dag oops, dag iops, InstrItinClass itin, string opc, string asm, diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 2aa9475e6f4..292d5d62fc4 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -981,6 +981,21 @@ def addrmode5_pre : AddrMode5 { let PrintMethod = "printAddrMode5Operand<true>"; } +// addrmode5fp16 := reg +/- imm8*2 +// +def AddrMode5FP16AsmOperand : AsmOperandClass { let Name = "AddrMode5FP16"; } +class AddrMode5FP16 : Operand<i32>, + ComplexPattern<i32, 2, "SelectAddrMode5FP16", []> { + let EncoderMethod = "getAddrMode5FP16OpValue"; + let DecoderMethod = "DecodeAddrMode5FP16Operand"; + let ParserMatchClass = AddrMode5FP16AsmOperand; + let MIOperandInfo = (ops GPR:$base, i32imm); +} + +def addrmode5fp16 : AddrMode5FP16 { + let PrintMethod = "printAddrMode5FP16Operand<false>"; +} + // addrmode6 := reg with optional alignment // def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; } diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index 050cd1a445a..8a175fdaefa 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -30,6 +30,18 @@ def FPImmOperand : AsmOperandClass { let ParserMethod = "parseFPImm"; } +def vfp_f16imm : Operand<f16>, + PatLeaf<(f16 fpimm), [{ + return ARM_AM::getFP16Imm(N->getValueAPF()) != -1; + }], SDNodeXForm<fpimm, [{ + APFloat InVal = N->getValueAPF(); + uint32_t enc = ARM_AM::getFP16Imm(InVal); + return CurDAG->getTargetConstant(enc, MVT::i32); + }]>> { + let PrintMethod = "printFPImmOperand"; + let ParserMatchClass = FPImmOperand; +} + def vfp_f32imm : Operand<f32>, PatLeaf<(f32 fpimm), [{ return ARM_AM::getFP32Imm(N->getValueAPF()) != -1; @@ -98,6 +110,11 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), let D = VFPNeonDomain; } +def VLDRH : AHI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5fp16:$addr), + IIC_fpLoad16, "vldr", ".16\t$Sd, $addr", + []>, + Requires<[HasFullFP16]>; + } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in' def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), @@ -112,6 +129,11 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), let D = VFPNeonDomain; } +def VSTRH : AHI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5fp16:$addr), + IIC_fpStore16, "vstr", ".16\t$Sd, $addr", + []>, + Requires<[HasFullFP16]>; + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -295,6 +317,12 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0, let D = VFPNeonA8Domain; } +let TwoOperandAliasConstraint = "$Sn = $Sd" in +def VADDH : AHbI<0b11100, 0b11, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm", + []>; + let TwoOperandAliasConstraint = "$Dn = $Dd" in def VSUBD : ADbI<0b11100, 0b11, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), @@ -311,6 +339,12 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0, let D = VFPNeonA8Domain; } +let TwoOperandAliasConstraint = "$Sn = $Sd" in +def VSUBH : AHbI<0b11100, 0b11, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm", + []>; + let TwoOperandAliasConstraint = "$Dn = $Dd" in def VDIVD : ADbI<0b11101, 0b00, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), @@ -323,6 +357,12 @@ def VDIVS : ASbI<0b11101, 0b00, 0, 0, IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>; +let TwoOperandAliasConstraint = "$Sn = $Sd" in +def VDIVH : AHbI<0b11101, 0b00, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpDIV16, "vdiv", ".f16\t$Sd, $Sn, $Sm", + []>; + let TwoOperandAliasConstraint = "$Dn = $Dd" in def VMULD : ADbI<0b11100, 0b10, 0, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), @@ -339,6 +379,12 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0, let D = VFPNeonA8Domain; } +let TwoOperandAliasConstraint = "$Sn = $Sd" in +def VMULH : AHbI<0b11100, 0b10, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpMUL16, "vmul", ".f16\t$Sd, $Sn, $Sm", + []>; + def VNMULD : ADbI<0b11100, 0b10, 1, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm", @@ -353,9 +399,20 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, let D = VFPNeonA8Domain; } +def VNMULH : AHbI<0b11100, 0b10, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + IIC_fpMUL16, "vnmul", ".f16\t$Sd, $Sn, $Sm", + []>; + multiclass vsel_inst<string op, bits<2> opc, int CC> { let DecoderNamespace = "VFPV8", PostEncoderMethod = "", Uses = [CPSR], AddedComplexity = 4 in { + def H : AHbInp<0b11100, opc, 0, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"), + []>, + Requires<[HasFullFP16]>; + def S : ASbInp<0b11100, opc, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), @@ -378,6 +435,12 @@ defm VSELVS : vsel_inst<"vs", 0b01, 6>; multiclass vmaxmin_inst<string op, bit opc, SDNode SD> { let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { + def H : AHbInp<0b11101, 0b00, opc, + (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + NoItinerary, !strconcat(op, ".f16\t$Sd, $Sn, $Sm"), + []>, + Requires<[HasFullFP16]>; + def S : ASbInp<0b11101, 0b00, opc, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat(op, ".f32\t$Sd, $Sn, $Sm"), @@ -418,6 +481,12 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, let D = VFPNeonA8Domain; } +def VCMPEH : AHuI<0b11101, 0b11, 0b0100, 0b11, 0, + (outs), (ins SPR:$Sd, SPR:$Sm), + IIC_fpCMP16, "vcmpe", ".f16\t$Sd, $Sm", + []>; + + // FIXME: Verify encoding after integrated assembler is working. def VCMPD : ADuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins DPR:$Dd, DPR:$Dm), @@ -432,6 +501,11 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, // VFP pipelines on A8. let D = VFPNeonA8Domain; } + +def VCMPH : AHuI<0b11101, 0b11, 0b0100, 0b01, 0, + (outs), (ins SPR:$Sd, SPR:$Sm), + IIC_fpCMP16, "vcmp", ".f16\t$Sd, $Sm", + []>; } // Defs = [FPSCR_NZCV] //===----------------------------------------------------------------------===// @@ -452,6 +526,11 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, let D = VFPNeonA8Domain; } +def VABSH : AHuI<0b11101, 0b11, 0b0000, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA16, "vabs", ".f16\t$Sd, $Sm", + []>; + let Defs = [FPSCR_NZCV] in { def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0, (outs), (ins DPR:$Dd), @@ -473,6 +552,14 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, let D = VFPNeonA8Domain; } +def VCMPEZH : AHuI<0b11101, 0b11, 0b0101, 0b11, 0, + (outs), (ins SPR:$Sd), + IIC_fpCMP16, "vcmpe", ".f16\t$Sd, #0", + []> { + let Inst{3-0} = 0b0000; + let Inst{5} = 0; +} + // FIXME: Verify encoding after integrated assembler is working. def VCMPZD : ADuI<0b11101, 0b11, 0b0101, 0b01, 0, (outs), (ins DPR:$Dd), @@ -493,6 +580,14 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, // VFP pipelines on A8. let D = VFPNeonA8Domain; } + +def VCMPZH : AHuI<0b11101, 0b11, 0b0101, 0b01, 0, + (outs), (ins SPR:$Sd), + IIC_fpCMP16, "vcmp", ".f16\t$Sd, #0", + []> { + let Inst{3-0} = 0b0000; + let Inst{5} = 0; +} } // Defs = [FPSCR_NZCV] def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, @@ -627,6 +722,22 @@ def : Pat<(f64 (f16_to_fp GPR:$a)), multiclass vcvt_inst<string opc, bits<2> rm, SDPatternOperator node = null_frag> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def SH : AHuInp<0b11101, 0b11, 0b1100, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".s32.f16\t$Sd, $Sm"), + []>, + Requires<[HasFullFP16]> { + let Inst{17-16} = rm; + } + + def UH : AHuInp<0b11101, 0b11, 0b1100, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vcvt", opc, ".u32.f16\t$Sd, $Sm"), + []>, + Requires<[HasFullFP16]> { + let Inst{17-16} = rm; + } + def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), @@ -715,7 +826,21 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, let D = VFPNeonA8Domain; } +def VNEGH : AHuI<0b11101, 0b11, 0b0001, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA16, "vneg", ".f16\t$Sd, $Sm", + []>; + multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> { + def H : AHuI<0b11101, 0b11, 0b0110, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc), ".f16\t$Sd, $Sm", + []>, + Requires<[HasFullFP16]> { + let Inst{7} = op2; + let Inst{16} = op; + } + def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", @@ -733,6 +858,9 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> { let Inst{16} = op; } + def : InstAlias<!strconcat("vrint", opc, "$p.f16.f16\t$Sd, $Sm"), + (!cast<Instruction>(NAME#"H") SPR:$Sd, SPR:$Sm, pred:$p)>, + Requires<[HasFullFP16]>; def : InstAlias<!strconcat("vrint", opc, "$p.f32.f32\t$Sd, $Sm"), (!cast<Instruction>(NAME#"S") SPR:$Sd, SPR:$Sm, pred:$p)>, Requires<[HasFPARMv8]>; @@ -748,6 +876,13 @@ defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>; multiclass vrint_inst_anpm<string opc, bits<2> rm, SDPatternOperator node = null_frag> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { + def H : AHuInp<0b11101, 0b11, 0b1000, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + NoItinerary, !strconcat("vrint", opc, ".f16\t$Sd, $Sm"), + []>, + Requires<[HasFullFP16]> { + let Inst{17-16} = rm; + } def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), @@ -787,6 +922,11 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fsqrt SPR:$Sm))]>; +def VSQRTH : AHuI<0b11101, 0b11, 0b0001, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpSQRT16, "vsqrt", ".f16\t$Sd, $Sm", + []>; + let hasSideEffects = 0 in { def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), @@ -795,6 +935,18 @@ def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>; + +let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { +def VMOVH : ASuInp<0b11101, 0b11, 0b0000, 0b01, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA16, "vmovx.f16\t$Sd, $Sm", []>, + Requires<[HasFullFP16]>; + +def VINSH : ASuInp<0b11101, 0b11, 0b0000, 0b11, 0, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpUNA16, "vins.f16\t$Sd, $Sm", []>, + Requires<[HasFullFP16]>; +} // PostEncoderMethod } // hasSideEffects //===----------------------------------------------------------------------===// @@ -966,6 +1118,44 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010, let DecoderMethod = "DecodeVMOVSRR"; } +// Move H->R, clearing top 16 bits +def VMOVRH : AVConv2I<0b11100001, 0b1001, + (outs GPR:$Rt), (ins SPR:$Sn), + IIC_fpMOVSI, "vmov", ".f16\t$Rt, $Sn", + []>, + Requires<[HasFullFP16]> { + // Instruction operands. + bits<4> Rt; + bits<5> Sn; + + // Encode instruction operands. + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Rt; + + let Inst{6-5} = 0b00; + let Inst{3-0} = 0b0000; +} + +// Move R->H, clearing top 16 bits +def VMOVHR : AVConv4I<0b11100000, 0b1001, + (outs SPR:$Sn), (ins GPR:$Rt), + IIC_fpMOVIS, "vmov", ".f16\t$Sn, $Rt", + []>, + Requires<[HasFullFP16]> { + // Instruction operands. + bits<5> Sn; + bits<4> Rt; + + // Encode instruction operands. + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Rt; + + let Inst{6-5} = 0b00; + let Inst{3-0} = 0b0000; +} + // FMRDH: SPR -> GPR // FMRDL: SPR -> GPR // FMRRS: SPR -> GPR @@ -1011,6 +1201,25 @@ class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{22} = Sd{0}; } +class AVConv1IHs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, + bits<4> opcod4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm, + pattern> { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Predicates = [HasFullFP16]; +} + def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm", @@ -1043,6 +1252,13 @@ def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)), def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VSITOS (VLDRS addrmode5:$a))>; +def VSITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTIH, "vcvt", ".f16.s32\t$Sd, $Sm", + []> { + let Inst{7} = 1; // s32 +} + def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, (outs DPR:$Dd), (ins SPR:$Sm), IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm", @@ -1075,6 +1291,13 @@ def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)), def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))), (VUITOS (VLDRS addrmode5:$a))>; +def VUITOH : AVConv1IHs_Encode<0b11101, 0b11, 0b1000, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTIH, "vcvt", ".f16.u32\t$Sd, $Sm", + []> { + let Inst{7} = 0; // u32 +} + // FP -> Int: class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, @@ -1113,6 +1336,25 @@ class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, let Inst{22} = Sd{0}; } +class AVConv1IsH_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, + bits<4> opcod4, dag oops, dag iops, + InstrItinClass itin, string opc, string asm, + list<dag> pattern> + : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm, + pattern> { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Predicates = [HasFullFP16]; +} + // Always set Z bit in the instruction, i.e. "round towards zero" variants. def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), @@ -1147,6 +1389,13 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr), (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>; +def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTHI, "vcvt", ".s32.f16\t$Sd, $Sm", + []> { + let Inst{7} = 1; // Z bit +} + def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm", @@ -1180,6 +1429,13 @@ def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr), (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>; +def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTHI, "vcvt", ".u32.f16\t$Sd, $Sm", + []> { + let Inst{7} = 1; // Z bit +} + // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. let Uses = [FPSCR] in { // FIXME: Verify encoding after integrated assembler is working. @@ -1197,6 +1453,13 @@ def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, let Inst{7} = 0; // Z bit } +def VTOSIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTHI, "vcvtr", ".s32.f16\t$Sd, $Sm", + []> { + let Inst{7} = 0; // Z bit +} + def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, (outs SPR:$Sd), (ins DPR:$Dm), IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm", @@ -1210,6 +1473,13 @@ def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> { let Inst{7} = 0; // Z bit } + +def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, + (outs SPR:$Sd), (ins SPR:$Sm), + IIC_fpCVTHI, "vcvtr", ".u32.f16\t$Sd, $Sm", + []> { + let Inst{7} = 0; // Z bit +} } // Convert between floating-point and fixed-point @@ -1249,6 +1519,26 @@ class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, let Predicates = [HasVFP2, HasDPVFP]; } +def VTOSHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTHI, "vcvt", ".s16.f16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VTOUHH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTHI, "vcvt", ".u16.f16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VTOSLH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1001, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTHI, "vcvt", ".s32.f16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VTOULH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1001, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTHI, "vcvt", ".u32.f16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> { @@ -1299,6 +1589,26 @@ def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1, // Fixed-Point to FP: +def VSHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTIH, "vcvt", ".f16.s16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VUHTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 0, + (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), + IIC_fpCVTIH, "vcvt", ".f16.u16\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VSLTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1001, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTIH, "vcvt", ".f16.s32\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + +def VULTOH : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1001, 1, + (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits), + IIC_fpCVTIH, "vcvt", ".f16.u32\t$dst, $a, $fbits", []>, + Requires<[HasFullFP16]>; + def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> { @@ -1373,6 +1683,13 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, let D = VFPNeonA8Domain; } +def VMLAH : AHbI<0b11100, 0b00, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC16, "vmla", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1400,6 +1717,13 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, let D = VFPNeonA8Domain; } +def VMLSH : AHbI<0b11100, 0b00, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC16, "vmls", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1427,6 +1751,13 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, let D = VFPNeonA8Domain; } +def VNMLAH : AHbI<0b11100, 0b01, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC16, "vnmla", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1453,6 +1784,13 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, let D = VFPNeonA8Domain; } +def VNMLSH : AHbI<0b11100, 0b01, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpMAC16, "vnmls", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1482,6 +1820,13 @@ def VFMAS : ASbIn<0b11101, 0b10, 0, 0, // VFP pipelines. } +def VFMAH : AHbI<0b11101, 0b10, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC16, "vfma", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFusedMAC]>; + def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; @@ -1517,6 +1862,13 @@ def VFMSS : ASbIn<0b11101, 0b10, 1, 0, // VFP pipelines. } +def VFMSH : AHbI<0b11101, 0b10, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC16, "vfms", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFusedMAC]>; + def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; @@ -1559,6 +1911,13 @@ def VFNMAS : ASbI<0b11101, 0b01, 1, 0, // VFP pipelines. } +def VFNMAH : AHbI<0b11101, 0b01, 1, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC16, "vfnma", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFusedMAC]>; + def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; @@ -1600,6 +1959,13 @@ def VFNMSS : ASbI<0b11101, 0b01, 0, 0, // VFP pipelines. } +def VFNMSH : AHbI<0b11101, 0b01, 0, 0, + (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm), + IIC_fpFMAC16, "vfnms", ".f16\t$Sd, $Sn, $Sm", + []>, + RegConstraint<"$Sdin = $Sd">, + Requires<[HasFullFP16,UseFusedMAC]>; + def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP4,HasDPVFP,UseFusedMAC]>; @@ -1780,6 +2146,23 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), let Inst{7-4} = 0b0000; let Inst{3-0} = imm{3-0}; } + +def FCONSTH : VFPAI<(outs SPR:$Sd), (ins vfp_f16imm:$imm), + VFPMiscFrm, IIC_fpUNA16, + "vmov", ".f16\t$Sd, $imm", + []>, Requires<[HasFullFP16]> { + bits<5> Sd; + bits<8> imm; + + let Inst{27-23} = 0b11101; + let Inst{22} = Sd{0}; + let Inst{21-20} = 0b11; + let Inst{19-16} = imm{7-4}; + let Inst{15-12} = Sd{4-1}; + let Inst{11-8} = 0b1001; // Half precision + let Inst{7-4} = 0b0000; + let Inst{3-0} = imm{3-0}; +} } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/ARM/ARMSchedule.td b/llvm/lib/Target/ARM/ARMSchedule.td index 528c4ec7378..c485e5111be 100644 --- a/llvm/lib/Target/ARM/ARMSchedule.td +++ b/llvm/lib/Target/ARM/ARMSchedule.td @@ -186,38 +186,50 @@ def IIC_iStore_mu : InstrItinClass; def IIC_Preload : InstrItinClass; def IIC_Br : InstrItinClass; def IIC_fpSTAT : InstrItinClass; +def IIC_fpUNA16 : InstrItinClass; def IIC_fpUNA32 : InstrItinClass; def IIC_fpUNA64 : InstrItinClass; +def IIC_fpCMP16 : InstrItinClass; def IIC_fpCMP32 : InstrItinClass; def IIC_fpCMP64 : InstrItinClass; def IIC_fpCVTSD : InstrItinClass; def IIC_fpCVTDS : InstrItinClass; def IIC_fpCVTSH : InstrItinClass; def IIC_fpCVTHS : InstrItinClass; +def IIC_fpCVTIH : InstrItinClass; def IIC_fpCVTIS : InstrItinClass; def IIC_fpCVTID : InstrItinClass; +def IIC_fpCVTHI : InstrItinClass; def IIC_fpCVTSI : InstrItinClass; def IIC_fpCVTDI : InstrItinClass; def IIC_fpMOVIS : InstrItinClass; def IIC_fpMOVID : InstrItinClass; def IIC_fpMOVSI : InstrItinClass; def IIC_fpMOVDI : InstrItinClass; +def IIC_fpALU16 : InstrItinClass; def IIC_fpALU32 : InstrItinClass; def IIC_fpALU64 : InstrItinClass; +def IIC_fpMUL16 : InstrItinClass; def IIC_fpMUL32 : InstrItinClass; def IIC_fpMUL64 : InstrItinClass; +def IIC_fpMAC16 : InstrItinClass; def IIC_fpMAC32 : InstrItinClass; def IIC_fpMAC64 : InstrItinClass; +def IIC_fpFMAC16 : InstrItinClass; def IIC_fpFMAC32 : InstrItinClass; def IIC_fpFMAC64 : InstrItinClass; +def IIC_fpDIV16 : InstrItinClass; def IIC_fpDIV32 : InstrItinClass; def IIC_fpDIV64 : InstrItinClass; +def IIC_fpSQRT16 : InstrItinClass; def IIC_fpSQRT32 : InstrItinClass; def IIC_fpSQRT64 : InstrItinClass; +def IIC_fpLoad16 : InstrItinClass; def IIC_fpLoad32 : InstrItinClass; def IIC_fpLoad64 : InstrItinClass; def IIC_fpLoad_m : InstrItinClass; def IIC_fpLoad_mu : InstrItinClass; +def IIC_fpStore16 : InstrItinClass; def IIC_fpStore32 : InstrItinClass; def IIC_fpStore64 : InstrItinClass; def IIC_fpStore_m : InstrItinClass; diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 73f33087756..72c98f01b38 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1183,6 +1183,20 @@ public: return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) || Val == INT32_MIN; } + bool isAddrMode5FP16() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm() && !isa<MCConstantExpr>(getImm())) + return true; + if (!isMem() || Memory.Alignment != 0) return false; + // Check for register offset. + if (Memory.OffsetRegNum) return false; + // Immediate offset in range [-510, 510] and a multiple of 2. + if (!Memory.OffsetImm) return true; + int64_t Val = Memory.OffsetImm->getValue(); + return (Val >= -510 && Val <= 510 && ((Val & 1) == 0)) || Val == INT32_MIN; + } bool isMemTBB() const { if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0) @@ -2145,6 +2159,28 @@ public: Inst.addOperand(MCOperand::createImm(Val)); } + void addAddrMode5FP16Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm()) { + Inst.addOperand(MCOperand::createExpr(getImm())); + Inst.addOperand(MCOperand::createImm(0)); + return; + } + + // The lower bit is always zero and as such is not encoded. + int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 2 : 0; + ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; + // Special case for #-0 + if (Val == INT32_MIN) Val = 0; + if (Val < 0) Val = -Val; + Val = ARM_AM::getAM5FP16Opc(AddSub, Val); + Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum)); + Inst.addOperand(MCOperand::createImm(Val)); + } + void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); // If we have an immediate that's not a constant, treat it as a label @@ -4973,7 +5009,8 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) { // vmov.i{8|16|32|64} <dreg|qreg>, #imm ARMOperand &TyOp = static_cast<ARMOperand &>(*Operands[2]); bool isVmovf = TyOp.isToken() && - (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64"); + (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64" || + TyOp.getToken() == ".f16"); ARMOperand &Mnemonic = static_cast<ARMOperand &>(*Operands[0]); bool isFconst = Mnemonic.isToken() && (Mnemonic.getToken() == "fconstd" || Mnemonic.getToken() == "fconsts"); @@ -5265,7 +5302,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" || - Mnemonic.startswith("vsel")) + Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5369,7 +5406,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" || Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || - (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) { + (FullInst.startswith("vmull") && FullInst.endswith(".p64")) || + Mnemonic == "vmovx" || Mnemonic == "vins") { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 8042fcd1301..a05111e4ceb 100644 --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -222,6 +222,8 @@ static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn, @@ -2183,6 +2185,7 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, DecodeStatus S = MCDisassembler::Success; unsigned Rn = fieldFromInstruction(Val, 9, 4); + // U == 1 to add imm, 0 to subtract it. unsigned U = fieldFromInstruction(Val, 8, 1); unsigned imm = fieldFromInstruction(Val, 0, 8); @@ -2197,6 +2200,26 @@ static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val, return S; } +static DecodeStatus DecodeAddrMode5FP16Operand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + DecodeStatus S = MCDisassembler::Success; + + unsigned Rn = fieldFromInstruction(Val, 9, 4); + // U == 1 to add imm, 0 to subtract it. + unsigned U = fieldFromInstruction(Val, 8, 1); + unsigned imm = fieldFromInstruction(Val, 0, 8); + + if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + + if (U) + Inst.addOperand(MCOperand::createImm(ARM_AM::getAM5FP16Opc(ARM_AM::add, imm))); + else + Inst.addOperand(MCOperand::createImm(ARM_AM::getAM5FP16Opc(ARM_AM::sub, imm))); + + return S; +} + static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { return DecodeGPRRegisterClass(Inst, Val, Address, Decoder); diff --git a/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index c639540b6c8..11330877c0e 100644 --- a/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -644,6 +644,34 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, O << "]" << markup(">"); } +template <bool AlwaysPrintImm0> +void ARMInstPrinter::printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, OpNum, STI, O); + return; + } + + O << markup("<mem:") << "["; + printRegName(O, MO1.getReg()); + + unsigned ImmOffs = ARM_AM::getAM5FP16Offset(MO2.getImm()); + unsigned Op = ARM_AM::getAM5FP16Op(MO2.getImm()); + if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) { + O << ", " + << markup("<imm:") + << "#" + << ARM_AM::getAddrOpcStr(ARM_AM::getAM5FP16Op(MO2.getImm())) + << ImmOffs * 2 + << markup(">"); + } + O << "]" << markup(">"); +} + void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { diff --git a/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 3927c9f8bfd..03db55569a2 100644 --- a/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -74,6 +74,9 @@ public: template <bool AlwaysPrintImm0> void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template <bool AlwaysPrintImm0> + void printAddrMode5FP16Operand(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index b03cada9a64..3959eab966a 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -486,7 +486,7 @@ namespace ARM_AM { // addrmode5 := reg +/- imm8*4 // // The first operand is always a Reg. The second operand encodes the - // operation in bit 8 and the immediate in bits 0-7. + // operation (add or subtract) in bit 8 and the immediate in bits 0-7. /// getAM5Opc - This function encodes the addrmode5 opc field. static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { @@ -501,6 +501,29 @@ namespace ARM_AM { } //===--------------------------------------------------------------------===// + // Addressing Mode #5 FP16 + //===--------------------------------------------------------------------===// + // + // This is used for coprocessor instructions, such as 16-bit FP load/stores. + // + // addrmode5fp16 := reg +/- imm8*2 + // + // The first operand is always a Reg. The second operand encodes the + // operation (add or subtract) in bit 8 and the immediate in bits 0-7. + + /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field. + static inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) { + bool isSub = Opc == sub; + return ((int)isSub << 8) | Offset; + } + static inline unsigned char getAM5FP16Offset(unsigned AM5Opc) { + return AM5Opc & 0xFF; + } + static inline AddrOpc getAM5FP16Op(unsigned AM5Opc) { + return ((AM5Opc >> 8) & 1) ? sub : add; + } + + //===--------------------------------------------------------------------===// // Addressing Mode #6 //===--------------------------------------------------------------------===// // @@ -650,6 +673,32 @@ namespace ARM_AM { return FPUnion.F; } + /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit + /// floating-point value. If the value cannot be represented as an 8-bit + /// floating-point value, then return -1. + static inline int getFP16Imm(const APInt &Imm) { + uint32_t Sign = Imm.lshr(15).getZExtValue() & 1; + int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15 + int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits + + // We can handle 4 bits of mantissa. + // mantissa = (16+UInt(e:f:g:h))/16. + if (Mantissa & 0x3f) + return -1; + Mantissa >>= 6; + + // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 + if (Exp < -3 || Exp > 4) + return -1; + Exp = ((Exp+3) & 0x7) ^ 4; + + return ((int)Sign << 7) | (Exp << 4) | Mantissa; + } + + static inline int getFP16Imm(const APFloat &FPImm) { + return getFP16Imm(FPImm.bitcastToAPInt()); + } + /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit /// floating-point value. If the value cannot be represented as an 8-bit /// floating-point value, then return -1. diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index fa52c9354c1..9b60ce57b90 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -62,6 +62,10 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_pcrel_9", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_pcrel_9", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_thumb_adr_pcrel_10", 0, 8, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, @@ -105,6 +109,10 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { {"fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_pcrel_9", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_pcrel_9", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, {"fixup_thumb_adr_pcrel_10", 8, 8, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, @@ -624,6 +632,37 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return Value; } + case ARM::fixup_arm_pcrel_9: + Value = Value - 4; // ARM fixups offset by an additional word and don't + // need to adjust for the half-word ordering. + // Fall through. + case ARM::fixup_t2_pcrel_9: { + // Offset by 4, adjusted by two due to the half-word ordering of thumb. + Value = Value - 4; + bool isAdd = true; + if ((int64_t)Value < 0) { + Value = -Value; + isAdd = false; + } + // These values don't encode the low bit since it's always zero. + if (Ctx && (Value & 1)) { + Ctx->reportError(Fixup.getLoc(), "invalid value for this fixup"); + return 0; + } + Value >>= 1; + if (Ctx && Value >= 256) { + Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value"); + return 0; + } + Value |= isAdd << 23; + + // Same addressing mode as fixup_arm_pcrel_9, but with 16-bit halfwords + // swapped. + if (Kind == ARM::fixup_t2_pcrel_9) + return swapHalfWords(Value, IsLittleEndian); + + return Value; + } } } @@ -695,6 +734,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_arm_pcrel_10_unscaled: case ARM::fixup_arm_ldst_pcrel_12: case ARM::fixup_arm_pcrel_10: + case ARM::fixup_arm_pcrel_9: case ARM::fixup_arm_adr_pcrel_12: case ARM::fixup_arm_uncondbl: case ARM::fixup_arm_condbl: @@ -708,6 +748,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_t2_condbranch: case ARM::fixup_t2_uncondbranch: case ARM::fixup_t2_pcrel_10: + case ARM::fixup_t2_pcrel_9: case ARM::fixup_t2_adr_pcrel_12: case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index 46ba57170db..51dbe1449b6 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -33,6 +33,13 @@ enum Fixups { // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for // the short-swapped encoding of Thumb2 instructions. fixup_t2_pcrel_10, + // fixup_arm_pcrel_9 - 9-bit PC relative relocation for symbol addresses + // used in VFP instructions where bit 0 not encoded (so it's encoded as an + // 8-bit immediate). + fixup_arm_pcrel_9, + // fixup_t2_pcrel_9 - Equivalent to fixup_arm_pcrel_9, accounting for + // the short-swapped encoding of Thumb2 instructions. + fixup_t2_pcrel_9, // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol // addresses where the lower 2 bits are not encoded (so it's encoded as an // 8-bit immediate). diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index b88578309f0..a8635ff3403 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -255,11 +255,16 @@ public: SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; - /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand. + /// getAddrMode5OpValue - Return encoding info for 'reg +/- (imm8 << 2)' operand. uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; + /// getAddrMode5FP16OpValue - Return encoding info for 'reg +/- (imm8 << 1)' operand. + uint32_t getAddrMode5FP16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + /// getCCOutOpValue - Return encoding of the 's' bit. unsigned getCCOutOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, @@ -1252,7 +1257,7 @@ getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx, return (MO.getImm() >> 2); } -/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand. +/// getAddrMode5OpValue - Return encoding info for 'reg +/- (imm8 << 2)' operand. uint32_t ARMMCCodeEmitter:: getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, @@ -1292,6 +1297,46 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx, return Binary; } +/// getAddrMode5FP16OpValue - Return encoding info for 'reg +/- (imm8 << 1)' operand. +uint32_t ARMMCCodeEmitter:: +getAddrMode5FP16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + // {12-9} = reg + // {8} = (U)nsigned (add == '1', sub == '0') + // {7-0} = imm8 + unsigned Reg, Imm8; + bool isAdd; + // If The first operand isn't a register, we have a label reference. + const MCOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) { + Reg = CTX.getRegisterInfo()->getEncodingValue(ARM::PC); // Rn is PC. + Imm8 = 0; + isAdd = false; // 'U' bit is handled as part of the fixup. + + assert(MO.isExpr() && "Unexpected machine operand type!"); + const MCExpr *Expr = MO.getExpr(); + MCFixupKind Kind; + if (isThumb2(STI)) + Kind = MCFixupKind(ARM::fixup_t2_pcrel_9); + else + Kind = MCFixupKind(ARM::fixup_arm_pcrel_9); + Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc())); + + ++MCNumCPRelocations; + } else { + EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups, STI); + isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add; + } + + uint32_t Binary = ARM_AM::getAM5Offset(Imm8); + // Immediate is always encoded as positive. The 'U' bit controls add vs sub. + if (isAdd) + Binary |= (1 << 8); + Binary |= (Reg << 9); + return Binary; +} + unsigned ARMMCCodeEmitter:: getSORegRegOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups, |