diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrFormats.td | 28 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrNEON.td | 318 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp | 10 |
4 files changed, 327 insertions, 31 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index b0aadfb031a..2c8cc6bc8ec 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1359,5 +1359,33 @@ class NeonI_Crypto_3VSHA<bits<2> size, bits<3> opcode, // Inherit Rd in 4-0 } +// Format AdvSIMD scalar x indexed element +class NeonI_ScalarXIndexedElem<bit u, bit szhi, bit szlo, + bits<4> opcode, dag outs, dag ins, + string asmstr, list<dag> patterns, + InstrItinClass itin> + : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ + let Inst{31} = 0b0; + let Inst{30} = 0b1; + let Inst{29} = u; + let Inst{28-24} = 0b11111; + let Inst{23} = szhi; + let Inst{22} = szlo; + // l in Inst{21} + // m in Instr{20} + // Inherit Rm in 19-16 + let Inst{15-12} = opcode; + // h in Inst{11} + let Inst{10} = 0b0; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} +// Format AdvSIMD scalar copy - insert from element to scalar +class NeonI_ScalarCopy<dag outs, dag ins, string asmstr, + list<dag> patterns, InstrItinClass itin> + : NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> { + let Inst{28} = 0b1; +} } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index ae217f9d4fd..23d81fc478e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1278,7 +1278,7 @@ def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)), // UBFX makes sense as an implementation of a 64-bit zero-extension too. Could // use either 64-bit or 32-bit variant, but 32-bit might be more efficient. -def : Pat<(zext i32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31), +def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31), sub_32)>; //===------------------------------- diff --git a/llvm/lib/Target/AArch64/AArch64InstrNEON.td b/llvm/lib/Target/AArch64/AArch64InstrNEON.td index 95e54f2fcb3..83bb1fa6541 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrNEON.td +++ b/llvm/lib/Target/AArch64/AArch64InstrNEON.td @@ -4671,6 +4671,294 @@ defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm, defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm, int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>; +def neon_uimm0_bare : Operand<i64>, + ImmLeaf<i64, [{return Imm == 0;}]> { + let ParserMatchClass = neon_uimm0_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm1_bare : Operand<i64>, + ImmLeaf<i64, [{(void)Imm; return true;}]> { + let ParserMatchClass = neon_uimm1_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm2_bare : Operand<i64>, + ImmLeaf<i64, [{(void)Imm; return true;}]> { + let ParserMatchClass = neon_uimm2_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm3_bare : Operand<i64>, + ImmLeaf<i64, [{(void)Imm; return true;}]> { + let ParserMatchClass = uimm3_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm4_bare : Operand<i64>, + ImmLeaf<i64, [{(void)Imm; return true;}]> { + let ParserMatchClass = uimm4_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + + +// Scalar by element Arithmetic + +class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode, + string rmlane, bit u, bit szhi, bit szlo, + RegisterClass ResFPR, RegisterClass OpFPR, + RegisterOperand OpVPR, Operand OpImm> + : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode, + (outs ResFPR:$Rd), + (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm), + asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]", + [], + NoItinerary> { + bits<3> Imm; + bits<5> MRm; +} + +class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode, + string rmlane, + bit u, bit szhi, bit szlo, + RegisterClass ResFPR, + RegisterClass OpFPR, + RegisterOperand OpVPR, + Operand OpImm> + : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode, + (outs ResFPR:$Rd), + (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm), + asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]", + [], + NoItinerary> { + let Constraints = "$src = $Rd"; + bits<3> Imm; + bits<5> MRm; +} + +// Scalar Floating Point multiply (scalar, by element) +def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul", + 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul", + 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { + let Inst{11} = Imm{0}; // h + let Inst{21} = 0b0; // l + let Inst{20-16} = MRm; +} + +// Scalar Floating Point multiply extended (scalar, by element) +def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx", + 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx", + 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { + let Inst{11} = Imm{0}; // h + let Inst{21} = 0b0; // l + let Inst{20-16} = MRm; +} + +// Scalar Floating Point fused multiply-add (scalar, by element) +def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", + 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", + 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { + let Inst{11} = Imm{0}; // h + let Inst{21} = 0b0; // l + let Inst{20-16} = MRm; +} + +// Scalar Floating Point fused multiply-subtract (scalar, by element) +def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", + 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", + 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { + let Inst{11} = Imm{0}; // h + let Inst{21} = 0b0; // l + let Inst{20-16} = MRm; +} + +// Scalar Signed saturating doubling multiply-add long (scalar, by element) +def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", + 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", + 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", + 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", + 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +// Scalar Signed saturating doubling +// multiply-subtract long (scalar, by element) +def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", + 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", + 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", + 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", + 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +// Scalar Signed saturating doubling multiply long (scalar, by element) +def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +// Scalar Signed saturating doubling multiply returning +// high half (scalar, by element) +def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh", + 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh", + 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh", + 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh", + 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +// Scalar Signed saturating rounding doubling multiply +// returning high half (scalar, by element) +def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", + 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", + 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", + 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", + 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + + +// Scalar Copy - DUP element to scalar +class NeonI_Scalar_DUP<string asmop, string asmlane, + RegisterClass ResRC, RegisterOperand VPRC, + Operand OpImm> + : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm), + asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]", + [], + NoItinerary> { + bits<4> Imm; +} + +def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} +def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> { + let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; +} //===----------------------------------------------------------------------===// @@ -4792,36 +5080,6 @@ def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; -def neon_uimm0_bare : Operand<i64>, - ImmLeaf<i64, [{return Imm == 0;}]> { - let ParserMatchClass = neon_uimm0_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm1_bare : Operand<i64>, - ImmLeaf<i64, [{(void)Imm; return true;}]> { - let ParserMatchClass = neon_uimm1_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm2_bare : Operand<i64>, - ImmLeaf<i64, [{(void)Imm; return true;}]> { - let ParserMatchClass = neon_uimm2_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm3_bare : Operand<i64>, - ImmLeaf<i64, [{(void)Imm; return true;}]> { - let ParserMatchClass = uimm3_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm4_bare : Operand<i64>, - ImmLeaf<i64, [{(void)Imm; return true;}]> { - let ParserMatchClass = uimm4_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - def neon_uimm3 : Operand<i64>, ImmLeaf<i64, [{(void)Imm; return true;}]> { let ParserMatchClass = uimm3_asmoperand; diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 38845b6d63e..c4f30628c31 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -82,6 +82,8 @@ static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); @@ -379,6 +381,14 @@ DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } +static DecodeStatus +DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 15) + return MCDisassembler::Fail; + + return DecodeFPR64RegisterClass(Inst, RegNo, Address, Decoder); +} static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, |