diff options
author | Bernard Ogden <bogden@arm.com> | 2018-08-17 11:29:49 +0000 |
---|---|---|
committer | Bernard Ogden <bogden@arm.com> | 2018-08-17 11:29:49 +0000 |
commit | b828bb2a15c00204514d7a9585ffe743d8858237 (patch) | |
tree | 5611815c44aad414271d169a6f10e0f162ab571d /llvm/lib/Target | |
parent | 6cb07d2bedb9125c317dc13962b0341a4667ba3b (diff) | |
download | bcm5719-llvm-b828bb2a15c00204514d7a9585ffe743d8858237.tar.gz bcm5719-llvm-b828bb2a15c00204514d7a9585ffe743d8858237.zip |
[ARM/AArch64] Support FP16 +fp16fml instructions
Add +fp16fml feature for new FP16 instructions, which are a
mandatory part of FP16 from v8.4-A and an optional part of FP16
from v8.2-A. It doesn't seem to be possible to model this in
LLVM, but the relationship between the options is handled by
the related clang patch.
In keeping with what I think is the usual practice, the fp16fml
extension is accepted regardless of base architecture version.
Builds on/replaces Sjoerd Meijer's patch to add these instructions at
https://reviews.llvm.org/D49839.
Differential Revision: https://reviews.llvm.org/D50228
llvm-svn: 340013
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrFormats.td | 22 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 20 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64Subtarget.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARM.td | 5 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrFormats.td | 31 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrInfo.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrNEON.td | 48 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMSubtarget.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 6 |
11 files changed, 142 insertions, 3 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index a69d38144c7..26608469f9a 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -71,6 +71,9 @@ def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", "Full FP16", [FeatureFPARMv8]>; +def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", + "Enable FP16 FML instructions", [FeatureFullFP16]>; + def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true", "Enable Statistical Profiling extension">; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 7caf32dbde2..123cddd53ba 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -4790,6 +4790,14 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode, let Inst{4-0} = Rd; } +let Predicates = [HasNEON, HasFP16FML] in +class BaseSIMDThreeSameMult<bit Q, bit U, bit b13, bits<3> size, string asm, string kind1, + string kind2> : + BaseSIMDThreeSameVector<Q, U, size, 0b11101, V128, asm, kind1, [] > { + let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}"); + let Inst{13} = b13; +} + class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1, string kind2, RegisterOperand RegType, ValueType AccumType, ValueType InputType, @@ -7255,6 +7263,20 @@ class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind, let Inst{11} = idx{1}; // H } +let Predicates = [HasNEON, HasFP16FML] in +class BaseSIMDThreeSameMultIndex<bit Q, bit U, bits<4> opc, string asm, + string dst_kind, string lhs_kind, + string rhs_kind> : + BaseSIMDIndexedTied<Q, U, 0, 0b10, opc, V128, V128, V128, + VectorIndexH, asm, "", dst_kind, lhs_kind, + rhs_kind, []> { + //idx = H:L:M + bits<3> idx; + let Inst{11} = idx{2}; // H + let Inst{21} = idx{1}; // L + let Inst{20} = idx{0}; // M +} + multiclass SIMDThreeSameVectorDotIndex<bit U, string asm, SDPatternOperator OpNode> { def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b", V64, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d6b8bb5d89c..d89ff41894e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -49,6 +49,8 @@ def HasRDM : Predicate<"Subtarget->hasRDM()">, def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">; def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, AssemblerPredicate<"FeatureFullFP16", "fullfp16">; +def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, + AssemblerPredicate<"FeatureFP16FML", "fp16fml">; def HasSPE : Predicate<"Subtarget->hasSPE()">, AssemblerPredicate<"FeatureSPE", "spe">; def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">, @@ -3299,6 +3301,24 @@ defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", int_aarch64_neon_sqsub>; +// FP16FML +def FMLAL_2S : BaseSIMDThreeSameMult<0, 0, 1, 0b001, "fmlal", ".2s", ".2h">; +def FMLSL_2S : BaseSIMDThreeSameMult<0, 0, 1, 0b101, "fmlsl", ".2s", ".2h">; +def FMLAL_4S : BaseSIMDThreeSameMult<1, 0, 1, 0b001, "fmlal", ".4s", ".4h">; +def FMLSL_4S : BaseSIMDThreeSameMult<1, 0, 1, 0b101, "fmlsl", ".4s", ".4h">; +def FMLAL2_2S : BaseSIMDThreeSameMult<0, 1, 0, 0b001, "fmlal2", ".2s", ".2h">; +def FMLSL2_2S : BaseSIMDThreeSameMult<0, 1, 0, 0b101, "fmlsl2", ".2s", ".2h">; +def FMLAL2_4S : BaseSIMDThreeSameMult<1, 1, 0, 0b001, "fmlal2", ".4s", ".4h">; +def FMLSL2_4S : BaseSIMDThreeSameMult<1, 1, 0, 0b101, "fmlsl2", ".4s", ".4h">; +def FMLALI_2s : BaseSIMDThreeSameMultIndex<0, 0, 0b0000, "fmlal", ".2s", ".2h", ".h">; +def FMLSLI_2s : BaseSIMDThreeSameMultIndex<0, 0, 0b0100, "fmlsl", ".2s", ".2h", ".h">; +def FMLALI_4s : BaseSIMDThreeSameMultIndex<1, 0, 0b0000, "fmlal", ".4s", ".4h", ".h">; +def FMLSLI_4s : BaseSIMDThreeSameMultIndex<1, 0, 0b0100, "fmlsl", ".4s", ".4h", ".h">; +def FMLALI2_2s : BaseSIMDThreeSameMultIndex<0, 1, 0b1000, "fmlal2", ".2s", ".2h", ".h">; +def FMLSLI2_2s : BaseSIMDThreeSameMultIndex<0, 1, 0b1100, "fmlsl2", ".2s", ".2h", ".h">; +def FMLALI2_4s : BaseSIMDThreeSameMultIndex<1, 1, 0b1000, "fmlal2", ".4s", ".4h", ".h">; +def FMLSLI2_4s : BaseSIMDThreeSameMultIndex<1, 1, 0b1100, "fmlsl2", ".4s", ".4h", ".h">; + defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 5af4c0dd9c1..6f08ca03381 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -78,6 +78,7 @@ protected: bool HasRDM = false; bool HasPerfMon = false; bool HasFullFP16 = false; + bool HasFP16FML = false; bool HasSPE = false; // ARMv8.4 Crypto extensions @@ -291,6 +292,7 @@ public: bool hasPerfMon() const { return HasPerfMon; } bool hasFullFP16() const { return HasFullFP16; } + bool hasFP16FML() const { return HasFP16FML; } bool hasSPE() const { return HasSPE; } bool hasLSLFast() const { return HasLSLFast; } bool hasSVE() const { return HasSVE; } diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td index 43e28f61d3a..f08afa8a7f0 100644 --- a/llvm/lib/Target/ARM/ARM.td +++ b/llvm/lib/Target/ARM/ARM.td @@ -61,6 +61,11 @@ def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true", "floating point", [FeatureFPARMv8]>; +def FeatureFP16FML : SubtargetFeature<"fp16fml", "HasFP16FML", "true", + "Enable full half-precision " + "floating point fml instructions", + [FeatureFullFP16]>; + def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", "Floating point unit supports " "single precision only">; diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td index 70aded247f6..87a28023e97 100644 --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -2579,6 +2579,37 @@ class N3VLaneCP8<bit op23, bits<2> op21_20, bit op6, bit op4, let Inst{3-0} = Vm{3-0}; } +// In Armv8.2-A, some NEON instructions are added that encode Vn and Vm +// differently: +// if Q == ‘1’ then UInt(N:Vn) else UInt(Vn:N); +// if Q == ‘1’ then UInt(M:Vm) else UInt(Vm:M); +// Class N3VCP8 above describes the Q=1 case, and this class the Q=0 case. +class N3VCP8Q0<bits<2> op24_23, bits<2> op21_20, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list<dag> pattern> + : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N3RegCplxFrm, itin, opc, dt, asm, cstr, pattern> { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let DecoderNamespace = "VFPV8"; + // These have the same encodings in ARM and Thumb2 + let PostEncoderMethod = ""; + + let Inst{31-25} = 0b1111110; + let Inst{24-23} = op24_23; + let Inst{22} = Vd{4}; + let Inst{21-20} = op21_20; + let Inst{19-16} = Vn{4-1}; + let Inst{15-12} = Vd{3-0}; + let Inst{11-8} = 0b1000; + let Inst{7} = Vn{0}; + let Inst{6} = op6; + let Inst{5} = Vm{0}; + let Inst{4} = op4; + let Inst{3-0} = Vm{4-1}; +} + // Operand types for complex instructions class ComplexRotationOperand<int Angle, int Remainder, string Type, string Diag> : AsmOperandClass { diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index d4c342cee5c..e6d85be2463 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -285,6 +285,8 @@ def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float conversions">; def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">, AssemblerPredicate<"FeatureFullFP16","full half-float">; +def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">, + AssemblerPredicate<"FeatureFP16FML","full half-float fml">; def HasDivideInThumb : Predicate<"Subtarget->hasDivideInThumbMode()">, AssemblerPredicate<"FeatureHWDivThumb", "divide in THUMB">; def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 5a300c727bb..a7bb32d31f6 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -5109,6 +5109,54 @@ def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm", (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>; } +// +fp16fml Floating Point Multiplication Variants +let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in { + +class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn, + RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> + : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, + asm, "f16", "$Vd, $Vn, $Vm", "", []>; + +class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn, + RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3> + : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary, + asm, "f16", "$Vd, $Vn, $Vm", "", []>; + +class VFMQ0<string opc, bits<2> S> + : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd), + (ins SPR:$Vn, SPR:$Vm, VectorIndex32:$idx), + IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> { + bit idx; + let Inst{3} = idx; + let Inst{19-16} = Vn{4-1}; + let Inst{7} = Vn{0}; + let Inst{5} = Vm{0}; + let Inst{2-0} = Vm{3-1}; +} + +class VFMQ1<string opc, bits<2> S> + : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd), + (ins DPR:$Vn, DPR:$Vm, VectorIndex16:$idx), + IIC_VMACD, opc, "f16", "$Vd, $Vn, $Vm$idx", "", []> { + bits<2> idx; + let Inst{5} = idx{1}; + let Inst{3} = idx{0}; +} + +let hasNoSchedulingInfo = 1 in { +// op1 op2 op3 +def VFMALD : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>; +def VFMSLD : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>; +def VFMALQ : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>; +def VFMSLQ : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>; +def VFMALDI : VFMQ0<"vfmal", 0b00>; +def VFMSLDI : VFMQ0<"vfmsl", 0b01>; +def VFMALQI : VFMQ1<"vfmal", 0b00>; +def VFMSLQI : VFMQ1<"vfmsl", 0b01>; +} +} // HasNEON, HasFP16FML + + def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>; def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm", diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 69bc3eaedfb..d7bfa896193 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -227,6 +227,9 @@ protected: /// HasFullFP16 - True if subtarget supports half-precision FP operations bool HasFullFP16 = false; + /// HasFP16FML - True if subtarget supports half-precision FP fml operations + bool HasFP16FML = false; + /// HasD16 - True if subtarget is limited to 16 double precision /// FP registers for VFPv3. bool HasD16 = false; @@ -622,6 +625,7 @@ public: bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } bool hasFullFP16() const { return HasFullFP16; } + bool hasFP16FML() const { return HasFP16FML; } bool hasFuseAES() const { return HasFuseAES; } bool hasFuseLiterals() const { return HasFuseLiterals; } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index e0cd2d8e26a..7d14bd7c256 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -57,7 +57,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { const FeatureBitset InlineFeatureWhitelist = { ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, - ARM::FeatureFullFP16, ARM::FeatureHWDivThumb, + ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb, ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index a5fbbbf26be..da6296a6319 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5626,7 +5626,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" || Mnemonic == "bxns" || Mnemonic == "blxns" || Mnemonic == "vudot" || Mnemonic == "vsdot" || - Mnemonic == "vcmla" || Mnemonic == "vcadd") + Mnemonic == "vcmla" || Mnemonic == "vcadd" || + Mnemonic == "vfmal" || Mnemonic == "vfmsl") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5716,7 +5717,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, (FullInst.startswith("vmull") && FullInst.endswith(".p64")) || Mnemonic == "vmovx" || Mnemonic == "vins" || Mnemonic == "vudot" || Mnemonic == "vsdot" || - Mnemonic == "vcmla" || Mnemonic == "vcadd") { + Mnemonic == "vcmla" || Mnemonic == "vcadd" || + Mnemonic == "vfmal" || Mnemonic == "vfmsl") { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { |