diff options
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrFormats.td | 124 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 26 |
2 files changed, 78 insertions, 72 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index d1e189362f0..aef0a7af500 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -4941,46 +4941,6 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode, let Inst{4-0} = Rd; } -// ARMv8.2 Fused Multiply Add Long Instructions (Vector) -class BaseSIMDThreeSameMult<bit Q, bit U, bit b13, bits<3> size, string asm, string kind1, - string kind2, RegisterOperand RegType, - ValueType AccumType, ValueType InputType, - SDPatternOperator OpNode> : - BaseSIMDThreeSameVectorTied<Q, U, size, 0b11101, RegType, asm, kind1, - [(set (AccumType RegType:$dst), - (OpNode (AccumType RegType:$Rd), - (InputType RegType:$Rn), - (InputType RegType:$Rm)))]> { - let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}"); - let Inst{13} = b13; -} - -multiclass SIMDThreeSameMult<bit U, bit b13, bits<3> size, string asm, SDPatternOperator OpNode> { - def v4f16 : BaseSIMDThreeSameMult<0, U, b13, size, asm, ".2s", ".2h", V64, - v2f32, v4f16, OpNode>; - def v8f16 : BaseSIMDThreeSameMult<1, U, b13, size, asm, ".4s", ".4h", V128, - v4f32, v8f16, OpNode>; -} - -class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1, - string kind2, RegisterOperand RegType, - ValueType AccumType, ValueType InputType, - SDPatternOperator OpNode> : - BaseSIMDThreeSameVectorTied<Q, U, 0b100, 0b10010, RegType, asm, kind1, - [(set (AccumType RegType:$dst), - (OpNode (AccumType RegType:$Rd), - (InputType RegType:$Rn), - (InputType RegType:$Rm)))]> { - let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}"); -} - -multiclass SIMDThreeSameVectorDot<bit U, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorDot<0, U, asm, ".2s", ".8b", V64, - v2i32, v8i8, OpNode>; - def v16i8 : BaseSIMDThreeSameVectorDot<1, U, asm, ".4s", ".16b", V128, - v4i32, v16i8, OpNode>; -} - // All operand sizes distinguished in the encoding. multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm, SDPatternOperator OpNode> { @@ -5221,6 +5181,51 @@ multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size, V128:$LHS, V128:$MHS, V128:$RHS)>; } +// ARMv8.2-A Dot Product Instructions (Vector): These instructions extract +// bytes from S-sized elements. +class BaseSIMDThreeSameVectorDot<bit Q, bit U, string asm, string kind1, + string kind2, RegisterOperand RegType, + ValueType AccumType, ValueType InputType, + SDPatternOperator OpNode> : + BaseSIMDThreeSameVectorTied<Q, U, 0b100, 0b10010, RegType, asm, kind1, + [(set (AccumType RegType:$dst), + (OpNode (AccumType RegType:$Rd), + (InputType RegType:$Rn), + (InputType RegType:$Rm)))]> { + let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}"); +} + +multiclass SIMDThreeSameVectorDot<bit U, string asm, SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVectorDot<0, U, asm, ".2s", ".8b", V64, + v2i32, v8i8, OpNode>; + def v16i8 : BaseSIMDThreeSameVectorDot<1, U, asm, ".4s", ".16b", V128, + v4i32, v16i8, OpNode>; +} + +// ARMv8.2-A Fused Multiply Add-Long Instructions (Vector): These instructions +// select inputs from 4H vectors and accumulate outputs to a 2S vector (or from +// 8H to 4S, when Q=1). +class BaseSIMDThreeSameVectorFML<bit Q, bit U, bit b13, bits<3> size, string asm, string kind1, + string kind2, RegisterOperand RegType, + ValueType AccumType, ValueType InputType, + SDPatternOperator OpNode> : + BaseSIMDThreeSameVectorTied<Q, U, size, 0b11101, RegType, asm, kind1, + [(set (AccumType RegType:$dst), + (OpNode (AccumType RegType:$Rd), + (InputType RegType:$Rn), + (InputType RegType:$Rm)))]> { + let AsmString = !strconcat(asm, "{\t$Rd" # kind1 # ", $Rn" # kind2 # ", $Rm" # kind2 # "}"); + let Inst{13} = b13; +} + +multiclass SIMDThreeSameVectorFML<bit U, bit b13, bits<3> size, string asm, + SDPatternOperator OpNode> { + def v4f16 : BaseSIMDThreeSameVectorFML<0, U, b13, size, asm, ".2s", ".2h", V64, + v2f32, v4f16, OpNode>; + def v8f16 : BaseSIMDThreeSameVectorFML<1, U, b13, size, asm, ".4s", ".4h", V128, + v4f32, v8f16, OpNode>; +} + //---------------------------------------------------------------------------- // AdvSIMD two register vector instructions. @@ -7427,7 +7432,7 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc, let Inst{4-0} = Rd; } -// ARMv8.2 Index Dot product instructions +// ARMv8.2-A Dot Product Instructions (Indexed) class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind, string lhs_kind, string rhs_kind, RegisterOperand RegType, @@ -7446,12 +7451,20 @@ class BaseSIMDThreeSameVectorDotIndex<bit Q, bit U, string asm, string dst_kind, let Inst{11} = idx{1}; // H } -// ARMv8.2 Fused Multiply Add Long Instructions (Indexed) -class BaseSIMDThreeSameMultIndex<bit Q, bit U, bits<4> opc, string asm, - string dst_kind, string lhs_kind, - string rhs_kind, RegisterOperand RegType, - ValueType AccumType, ValueType InputType, - SDPatternOperator OpNode> : +multiclass SIMDThreeSameVectorDotIndex<bit U, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b", + V64, v2i32, v8i8, OpNode>; + def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, asm, ".4s", ".16b", ".4b", + V128, v4i32, v16i8, OpNode>; +} + +// ARMv8.2-A Fused Multiply Add-Long Instructions (Indexed) +class BaseSIMDThreeSameVectorFMLIndex<bit Q, bit U, bits<4> opc, string asm, + string dst_kind, string lhs_kind, + string rhs_kind, RegisterOperand RegType, + ValueType AccumType, ValueType InputType, + SDPatternOperator OpNode> : BaseSIMDIndexedTied<Q, U, 0, 0b10, opc, RegType, RegType, V128, VectorIndexH, asm, "", dst_kind, lhs_kind, rhs_kind, [(set (AccumType RegType:$dst), @@ -7466,19 +7479,12 @@ class BaseSIMDThreeSameMultIndex<bit Q, bit U, bits<4> opc, string asm, let Inst{20} = idx{0}; // M } -multiclass SIMDThreeSameVectorDotIndex<bit U, string asm, +multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorDotIndex<0, U, asm, ".2s", ".8b", ".4b", V64, - v2i32, v8i8, OpNode>; - def v16i8 : BaseSIMDThreeSameVectorDotIndex<1, U, asm, ".4s", ".16b", ".4b", V128, - v4i32, v16i8, OpNode>; -} - -multiclass SIMDThreeSameMultIndex<bit U, bits<4> opc, string asm, SDPatternOperator OpNode> { - def v4f16 : BaseSIMDThreeSameMultIndex<0, U, opc, asm, ".2s", ".2h", ".h", V64, - v2f32, v4f16, OpNode>; - def v8f16 : BaseSIMDThreeSameMultIndex<1, U, opc, asm, ".4s", ".4h", ".h", V128, - v4f32, v8f16, OpNode>; + def v4f16 : BaseSIMDThreeSameVectorFMLIndex<0, U, opc, asm, ".2s", ".2h", ".h", + V64, v2f32, v4f16, OpNode>; + def v8f16 : BaseSIMDThreeSameVectorFMLIndex<1, U, opc, asm, ".4s", ".4h", ".h", + V128, v4f32, v8f16, OpNode>; } multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 77461eccf3e..2dc5991d708 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -514,7 +514,7 @@ def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> { } } -// ARMv8.2 Dot Product +// ARMv8.2-A Dot Product let Predicates = [HasDotProd] in { defm SDOT : SIMDThreeSameVectorDot<0, "sdot", int_aarch64_neon_sdot>; defm UDOT : SIMDThreeSameVectorDot<1, "udot", int_aarch64_neon_udot>; @@ -522,6 +522,18 @@ defm SDOTlane : SIMDThreeSameVectorDotIndex<0, "sdot", int_aarch64_neon_sdot>; defm UDOTlane : SIMDThreeSameVectorDotIndex<1, "udot", int_aarch64_neon_udot>; } +// ARMv8.2-A FP16 Fused Multiply-Add Long +let Predicates = [HasNEON, HasFP16FML] in { +defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>; +defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>; +defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>; +defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>; +defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>; +defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>; +defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>; +defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>; +} + // Armv8.2-A Crypto extensions let Predicates = [HasSHA3] in { def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">; @@ -3484,18 +3496,6 @@ defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah", defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh", int_aarch64_neon_sqsub>; -// FP16FML -let Predicates = [HasNEON, HasFP16FML] in { -defm FMLAL : SIMDThreeSameMult<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>; -defm FMLSL : SIMDThreeSameMult<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>; -defm FMLAL2 : SIMDThreeSameMult<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>; -defm FMLSL2 : SIMDThreeSameMult<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>; -defm FMLALlane : SIMDThreeSameMultIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>; -defm FMLSLlane : SIMDThreeSameMultIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>; -defm FMLAL2lane : SIMDThreeSameMultIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>; -defm FMLSL2lane : SIMDThreeSameMultIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>; -} - defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; |