diff options
| author | Cullen Rhodes <cullen.rhodes@arm.com> | 2019-12-04 16:33:49 +0000 |
|---|---|---|
| committer | Cullen Rhodes <cullen.rhodes@arm.com> | 2019-12-19 13:18:40 +0000 |
| commit | 23c28c40436143006be740533375c036d11c92cd (patch) | |
| tree | f57e5c3be015cf34a620d6b7424de6f017dd5598 /llvm/lib | |
| parent | 60cb33c9b8543dee1630410c13679088d0569d03 (diff) | |
| download | bcm5719-llvm-23c28c40436143006be740533375c036d11c92cd.tar.gz bcm5719-llvm-23c28c40436143006be740533375c036d11c92cd.zip | |
[AArch64][SVE] Add permutation and selection intrinsics
Summary:
Adds the following intrinsics:
* @llvm.aarch64.sve.clasta
* @llvm.aarch64.sve.clasta_n
* @llvm.aarch64.sve.clastb
* @llvm.aarch64.sve.clastb_n
* @llvm.aarch64.sve.compact
* @llvm.aarch64.sve.ext
* @llvm.aarch64.sve.lasta
* @llvm.aarch64.sve.lastb
* @llvm.aarch64.sve.rev
* @llvm.aarch64.sve.splice
* @llvm.aarch64.sve.tbl
* @llvm.aarch64.sve.trn1
* @llvm.aarch64.sve.trn2
* @llvm.aarch64.sve.uzp1
* @llvm.aarch64.sve.uzp2
* @llvm.aarch64.sve.zip1
* @llvm.aarch64.sve.zip2
Reviewers: sdesmalen, efriedma, dancgr, mgudim, huntergr, rengolin
Reviewed By: sdesmalen, efriedma
Subscribers: kmclaughlin, tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71401
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 111 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.h | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 69 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/SVEInstrFormats.td | 113 |
5 files changed, 264 insertions, 42 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 85bbb3fc43c..87da1bb51b6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1292,6 +1292,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::ORV_PRED: return "AArch64ISD::ORV_PRED"; case AArch64ISD::EORV_PRED: return "AArch64ISD::EORV_PRED"; case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED"; + case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N"; + case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N"; + case AArch64ISD::LASTA: return "AArch64ISD::LASTA"; + case AArch64ISD::LASTB: return "AArch64ISD::LASTB"; + case AArch64ISD::REV: return "AArch64ISD::REV"; + case AArch64ISD::TBL: return "AArch64ISD::TBL"; case AArch64ISD::NOT: return "AArch64ISD::NOT"; case AArch64ISD::BIT: return "AArch64ISD::BIT"; case AArch64ISD::CBZ: return "AArch64ISD::CBZ"; @@ -2922,6 +2928,42 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::aarch64_sve_uunpklo: return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(), Op.getOperand(1)); + case Intrinsic::aarch64_sve_clasta_n: + return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::aarch64_sve_clastb_n: + return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::aarch64_sve_lasta: + return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_sve_lastb: + return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_sve_rev: + return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(), + Op.getOperand(1)); + case Intrinsic::aarch64_sve_tbl: + return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_sve_trn1: + return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_sve_trn2: + return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_sve_uzp1: + return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_sve_uzp2: + return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_sve_zip1: + return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::aarch64_sve_zip2: + return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); case Intrinsic::aarch64_sve_ptrue: return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(), Op.getOperand(1)); @@ -10684,6 +10726,31 @@ static SDValue LowerSVEIntReduction(SDNode *N, unsigned Opc, return SDValue(); } +static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) { + SDLoc dl(N); + LLVMContext &Ctx = *DAG.getContext(); + EVT VT = N->getValueType(0); + + assert(VT.isScalableVector() && "Expected a scalable vector."); + + // Current lowering only supports the SVE-ACLE types. + if (VT.getSizeInBits() != AArch64::SVEBitsPerBlock) + return SDValue(); + + unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8; + unsigned ByteSize = VT.getSizeInBits() / 8; + EVT ByteVT = EVT::getVectorVT(Ctx, MVT::i8, { ByteSize, true }); + + // Convert everything to the domain of EXT (i.e bytes). + SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1)); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2)); + SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3), + DAG.getConstant(ElemSize, dl, MVT::i32)); + + SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2); + return DAG.getNode(ISD::BITCAST, dl, VT, EXT); +} + static SDValue tryConvertSVEWideCompare(SDNode *N, unsigned ReplacementIID, bool Invert, TargetLowering::DAGCombinerInfo &DCI, @@ -10823,6 +10890,8 @@ static SDValue performIntrinsicCombine(SDNode *N, return LowerSVEIntReduction(N, AArch64ISD::EORV_PRED, DAG); case Intrinsic::aarch64_sve_andv: return LowerSVEIntReduction(N, AArch64ISD::ANDV_PRED, DAG); + case Intrinsic::aarch64_sve_ext: + return LowerSVEIntrinsicEXT(N, DAG); case Intrinsic::aarch64_sve_cmpeq_wide: return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpeq, false, DCI, DAG); @@ -12734,6 +12803,48 @@ void AArch64TargetLowering::ReplaceNodeResults( Results.append({Pair, Result.getValue(2) /* Chain */}); return; } + case ISD::INTRINSIC_WO_CHAIN: { + EVT VT = N->getValueType(0); + assert((VT == MVT::i8 || VT == MVT::i16) && + "custom lowering for unexpected type"); + + ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0)); + Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue()); + switch (IntID) { + default: + return; + case Intrinsic::aarch64_sve_clasta_n: { + SDLoc DL(N); + auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2)); + auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32, + N->getOperand(1), Op2, N->getOperand(3)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); + return; + } + case Intrinsic::aarch64_sve_clastb_n: { + SDLoc DL(N); + auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2)); + auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32, + N->getOperand(1), Op2, N->getOperand(3)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); + return; + } + case Intrinsic::aarch64_sve_lasta: { + SDLoc DL(N); + auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32, + N->getOperand(1), N->getOperand(2)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); + return; + } + case Intrinsic::aarch64_sve_lastb: { + SDLoc DL(N); + auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32, + N->getOperand(1), N->getOperand(2)); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V)); + return; + } + } + } } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 1ee134c5bfc..3a986e96e26 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -204,6 +204,13 @@ enum NodeType : unsigned { UUNPKHI, UUNPKLO, + CLASTA_N, + CLASTB_N, + LASTA, + LASTB, + REV, + TBL, + INSR, PTRUE, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 861c0c0a18a..f4d340c9f06 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -263,6 +263,10 @@ def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, SDTCisSameAs<1, 4>]>; +def SDT_AArch64TBL : SDTypeProfile<1, 2, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2> +]>; + // non-extending masked load fragment. def nonext_masked_load : PatFrag<(ops node:$ptr, node:$pred, node:$def), @@ -541,6 +545,8 @@ def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>; def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 92bc59a3b72..f41664b4c2c 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -63,6 +63,15 @@ def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>; def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>; def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>; +def AArch64lasta : SDNode<"AArch64ISD::LASTA", SDT_AArch64Reduce>; +def AArch64lastb : SDNode<"AArch64ISD::LASTB", SDT_AArch64Reduce>; + +def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; +def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; +def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>; + +def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; +def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>; let Predicates = [HasSVE] in { @@ -251,19 +260,19 @@ let Predicates = [HasSVE] in { // Select elements from either vector (predicated) defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>; - defm SPLICE_ZPZ : sve_int_perm_splice<"splice">; - defm COMPACT_ZPZ : sve_int_perm_compact<"compact">; + defm SPLICE_ZPZ : sve_int_perm_splice<"splice", int_aarch64_sve_splice>; + defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>; defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>; defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>; - def EXT_ZZI : sve_int_perm_extract_i<"ext">; + defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>; defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", int_aarch64_sve_rbit>; defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", int_aarch64_sve_revb, bswap>; defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>; defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>; - defm REV_PP : sve_int_perm_reverse_p<"rev">; - defm REV_ZZ : sve_int_perm_reverse_z<"rev">; + defm REV_PP : sve_int_perm_reverse_p<"rev", AArch64rev>; + defm REV_ZZ : sve_int_perm_reverse_z<"rev", AArch64rev>; defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>; defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>; @@ -314,17 +323,17 @@ let Predicates = [HasSVE] in { defm NORS_PPzPP : sve_int_pred_log<0b1110, "nors", int_aarch64_sve_nors>; defm NANDS_PPzPP : sve_int_pred_log<0b1111, "nands", int_aarch64_sve_nands>; - defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta">; - defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb">; - defm CLASTA_VPZ : sve_int_perm_clast_vz<0, "clasta">; - defm CLASTB_VPZ : sve_int_perm_clast_vz<1, "clastb">; - defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta">; - defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb">; + defm CLASTA_RPZ : sve_int_perm_clast_rz<0, "clasta", AArch64clasta_n>; + defm CLASTB_RPZ : sve_int_perm_clast_rz<1, "clastb", AArch64clastb_n>; + defm CLASTA_VPZ : sve_int_perm_clast_vz<0, "clasta", AArch64clasta_n>; + defm CLASTB_VPZ : sve_int_perm_clast_vz<1, "clastb", AArch64clastb_n>; + defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta", int_aarch64_sve_clasta>; + defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb", int_aarch64_sve_clastb>; - defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta">; - defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb">; - defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta">; - defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb">; + defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta", AArch64lasta>; + defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb", AArch64lastb>; + defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta", AArch64lasta>; + defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb", AArch64lastb>; // continuous load with reg+immediate defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>; @@ -749,21 +758,21 @@ let Predicates = [HasSVE] in { defm ADR_LSL_ZZZ_S : sve_int_bin_cons_misc_0_a_32_lsl<0b10, "adr">; defm ADR_LSL_ZZZ_D : sve_int_bin_cons_misc_0_a_64_lsl<0b11, "adr">; - defm TBL_ZZZ : sve_int_perm_tbl<"tbl">; - - defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1">; - defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2">; - defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1">; - defm UZP2_ZZZ : sve_int_perm_bin_perm_zz<0b011, "uzp2">; - defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1">; - defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2">; - - defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1">; - defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2">; - defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1">; - defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2">; - defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1">; - defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2">; + defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>; + + defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1", AArch64zip1>; + defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2", AArch64zip2>; + defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1", AArch64uzp1>; + defm UZP2_ZZZ : sve_int_perm_bin_perm_zz<0b011, "uzp2", AArch64uzp2>; + defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1", AArch64trn1>; + defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2", AArch64trn2>; + + defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1>; + defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2>; + defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1>; + defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2", AArch64uzp2>; + defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>; + defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>; defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", int_aarch64_sve_cmphs, SETUGE>; defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", int_aarch64_sve_cmphi, SETUGT>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 63214215add..a643c65152f 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -821,7 +821,7 @@ class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm, let Inst{4-0} = Zd; } -multiclass sve_int_perm_tbl<string asm> { +multiclass sve_int_perm_tbl<string asm, SDPatternOperator op> { def _B : sve_int_perm_tbl<0b00, 0b10, asm, ZPR8, Z_b>; def _H : sve_int_perm_tbl<0b01, 0b10, asm, ZPR16, Z_h>; def _S : sve_int_perm_tbl<0b10, 0b10, asm, ZPR32, Z_s>; @@ -835,6 +835,15 @@ multiclass sve_int_perm_tbl<string asm> { (!cast<Instruction>(NAME # _S) ZPR32:$Zd, ZPR32:$Zn, ZPR32:$Zm), 0>; def : InstAlias<asm # "\t$Zd, $Zn, $Zm", (!cast<Instruction>(NAME # _D) ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zm), 0>; + + def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>; + + def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>; } multiclass sve2_int_perm_tbl<string asm> { @@ -884,11 +893,20 @@ class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty> let Inst{4-0} = Zd; } -multiclass sve_int_perm_reverse_z<string asm> { +multiclass sve_int_perm_reverse_z<string asm, SDPatternOperator op> { def _B : sve_int_perm_reverse_z<0b00, asm, ZPR8>; def _H : sve_int_perm_reverse_z<0b01, asm, ZPR16>; def _S : sve_int_perm_reverse_z<0b10, asm, ZPR32>; def _D : sve_int_perm_reverse_z<0b11, asm, ZPR64>; + + def : SVE_1_Op_Pat<nxv16i8, op, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_1_Op_Pat<nxv8i16, op, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_1_Op_Pat<nxv4i32, op, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_1_Op_Pat<nxv2i64, op, nxv2i64, !cast<Instruction>(NAME # _D)>; + + def : SVE_1_Op_Pat<nxv8f16, op, nxv8f16, !cast<Instruction>(NAME # _H)>; + def : SVE_1_Op_Pat<nxv4f32, op, nxv4f32, !cast<Instruction>(NAME # _S)>; + def : SVE_1_Op_Pat<nxv2f64, op, nxv2f64, !cast<Instruction>(NAME # _D)>; } class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty> @@ -906,11 +924,16 @@ class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty> let Inst{3-0} = Pd; } -multiclass sve_int_perm_reverse_p<string asm> { +multiclass sve_int_perm_reverse_p<string asm, SDPatternOperator op> { def _B : sve_int_perm_reverse_p<0b00, asm, PPR8>; def _H : sve_int_perm_reverse_p<0b01, asm, PPR16>; def _S : sve_int_perm_reverse_p<0b10, asm, PPR32>; def _D : sve_int_perm_reverse_p<0b11, asm, PPR64>; + + def : SVE_1_Op_Pat<nxv16i1, op, nxv16i1, !cast<Instruction>(NAME # _B)>; + def : SVE_1_Op_Pat<nxv8i1, op, nxv8i1, !cast<Instruction>(NAME # _H)>; + def : SVE_1_Op_Pat<nxv4i1, op, nxv4i1, !cast<Instruction>(NAME # _S)>; + def : SVE_1_Op_Pat<nxv2i1, op, nxv2i1, !cast<Instruction>(NAME # _D)>; } class sve_int_perm_unpk<bits<2> sz16_64, bits<2> opc, string asm, @@ -1021,6 +1044,13 @@ class sve_int_perm_extract_i<string asm> let ElementSize = ElementSizeNone; } +multiclass sve_int_perm_extract_i<string asm, SDPatternOperator op> { + def NAME : sve_int_perm_extract_i<asm>; + + def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, imm0_255, + !cast<Instruction>(NAME)>; +} + class sve2_int_perm_extract_i_cons<string asm> : I<(outs ZPR8:$Zd), (ins ZZ_b:$Zn, imm0_255:$imm8), asm, "\t$Zd, $Zn, $imm8", @@ -1918,11 +1948,22 @@ class sve_int_perm_bin_perm_zz<bits<3> opc, bits<2> sz8_64, string asm, let Inst{4-0} = Zd; } -multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm> { +multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm, + SDPatternOperator op> { def _B : sve_int_perm_bin_perm_zz<opc, 0b00, asm, ZPR8>; def _H : sve_int_perm_bin_perm_zz<opc, 0b01, asm, ZPR16>; def _S : sve_int_perm_bin_perm_zz<opc, 0b10, asm, ZPR32>; def _D : sve_int_perm_bin_perm_zz<opc, 0b11, asm, ZPR64>; + + def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>; + + def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<nxv4f16, op, nxv4f16, nxv4f16, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>; } //===----------------------------------------------------------------------===// @@ -4862,11 +4903,17 @@ class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm, let Inst{3-0} = Pd; } -multiclass sve_int_perm_bin_perm_pp<bits<3> opc, string asm> { +multiclass sve_int_perm_bin_perm_pp<bits<3> opc, string asm, + SDPatternOperator op> { def _B : sve_int_perm_bin_perm_pp<opc, 0b00, asm, PPR8>; def _H : sve_int_perm_bin_perm_pp<opc, 0b01, asm, PPR16>; def _S : sve_int_perm_bin_perm_pp<opc, 0b10, asm, PPR32>; def _D : sve_int_perm_bin_perm_pp<opc, 0b11, asm, PPR64>; + + def : SVE_2_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, !cast<Instruction>(NAME # _B)>; + def : SVE_2_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, !cast<Instruction>(NAME # _D)>; } class sve_int_perm_punpk<bit opc, string asm> @@ -4972,11 +5019,16 @@ class sve_int_perm_clast_rz<bits<2> sz8_64, bit ab, string asm, let Constraints = "$Rdn = $_Rdn"; } -multiclass sve_int_perm_clast_rz<bit ab, string asm> { +multiclass sve_int_perm_clast_rz<bit ab, string asm, SDPatternOperator op> { def _B : sve_int_perm_clast_rz<0b00, ab, asm, ZPR8, GPR32>; def _H : sve_int_perm_clast_rz<0b01, ab, asm, ZPR16, GPR32>; def _S : sve_int_perm_clast_rz<0b10, ab, asm, ZPR32, GPR32>; def _D : sve_int_perm_clast_rz<0b11, ab, asm, ZPR64, GPR64>; + + def : SVE_3_Op_Pat<i32, op, nxv16i1, i32, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Pat<i32, op, nxv8i1, i32, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Pat<i32, op, nxv4i1, i32, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Pat<i64, op, nxv2i1, i64, nxv2i64, !cast<Instruction>(NAME # _D)>; } class sve_int_perm_clast_vz<bits<2> sz8_64, bit ab, string asm, @@ -5000,11 +5052,15 @@ class sve_int_perm_clast_vz<bits<2> sz8_64, bit ab, string asm, let Constraints = "$Vdn = $_Vdn"; } -multiclass sve_int_perm_clast_vz<bit ab, string asm> { +multiclass sve_int_perm_clast_vz<bit ab, string asm, SDPatternOperator op> { def _B : sve_int_perm_clast_vz<0b00, ab, asm, ZPR8, FPR8>; def _H : sve_int_perm_clast_vz<0b01, ab, asm, ZPR16, FPR16>; def _S : sve_int_perm_clast_vz<0b10, ab, asm, ZPR32, FPR32>; def _D : sve_int_perm_clast_vz<0b11, ab, asm, ZPR64, FPR64>; + + def : SVE_3_Op_Pat<f16, op, nxv8i1, f16, nxv8f16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Pat<f32, op, nxv4i1, f32, nxv4f32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Pat<f64, op, nxv2i1, f64, nxv2f64, !cast<Instruction>(NAME # _D)>; } class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm, @@ -5030,11 +5086,20 @@ class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm, let ElementSize = ElementSizeNone; } -multiclass sve_int_perm_clast_zz<bit ab, string asm> { +multiclass sve_int_perm_clast_zz<bit ab, string asm, SDPatternOperator op> { def _B : sve_int_perm_clast_zz<0b00, ab, asm, ZPR8>; def _H : sve_int_perm_clast_zz<0b01, ab, asm, ZPR16>; def _S : sve_int_perm_clast_zz<0b10, ab, asm, ZPR32>; def _D : sve_int_perm_clast_zz<0b11, ab, asm, ZPR64>; + + def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>; + + def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>; } class sve_int_perm_last_r<bits<2> sz8_64, bit ab, string asm, @@ -5056,11 +5121,16 @@ class sve_int_perm_last_r<bits<2> sz8_64, bit ab, string asm, let Inst{4-0} = Rd; } -multiclass sve_int_perm_last_r<bit ab, string asm> { +multiclass sve_int_perm_last_r<bit ab, string asm, SDPatternOperator op> { def _B : sve_int_perm_last_r<0b00, ab, asm, ZPR8, GPR32>; def _H : sve_int_perm_last_r<0b01, ab, asm, ZPR16, GPR32>; def _S : sve_int_perm_last_r<0b10, ab, asm, ZPR32, GPR32>; def _D : sve_int_perm_last_r<0b11, ab, asm, ZPR64, GPR64>; + + def : SVE_2_Op_Pat<i32, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_2_Op_Pat<i32, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>; } class sve_int_perm_last_v<bits<2> sz8_64, bit ab, string asm, @@ -5082,11 +5152,16 @@ class sve_int_perm_last_v<bits<2> sz8_64, bit ab, string asm, let Inst{4-0} = Vd; } -multiclass sve_int_perm_last_v<bit ab, string asm> { +multiclass sve_int_perm_last_v<bit ab, string asm, SDPatternOperator op> { def _B : sve_int_perm_last_v<0b00, ab, asm, ZPR8, FPR8>; def _H : sve_int_perm_last_v<0b01, ab, asm, ZPR16, FPR16>; def _S : sve_int_perm_last_v<0b10, ab, asm, ZPR32, FPR32>; def _D : sve_int_perm_last_v<0b11, ab, asm, ZPR64, FPR64>; + + def : SVE_2_Op_Pat<f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>; } class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty> @@ -5109,11 +5184,20 @@ class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty> let ElementSize = ElementSizeNone; } -multiclass sve_int_perm_splice<string asm> { +multiclass sve_int_perm_splice<string asm, SDPatternOperator op> { def _B : sve_int_perm_splice<0b00, asm, ZPR8>; def _H : sve_int_perm_splice<0b01, asm, ZPR16>; def _S : sve_int_perm_splice<0b10, asm, ZPR32>; def _D : sve_int_perm_splice<0b11, asm, ZPR64>; + + def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>; + + def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>; + def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>; + def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>; } class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm, @@ -5295,9 +5379,14 @@ class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty> let Inst{4-0} = Zd; } -multiclass sve_int_perm_compact<string asm> { +multiclass sve_int_perm_compact<string asm, SDPatternOperator op> { def _S : sve_int_perm_compact<0b0, asm, ZPR32>; def _D : sve_int_perm_compact<0b1, asm, ZPR64>; + + def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>; + def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>; } |

