diff options
| author | Danilo Carvalho Grael <danilo.carvalho.grael@huawei.com> | 2019-12-05 08:07:02 -0500 |
|---|---|---|
| committer | amehsan <e00408328@ptlaby04.huawei.com> | 2019-12-05 09:59:19 -0500 |
| commit | b29916cec3f45e5fb5efff5104acf142f348c724 (patch) | |
| tree | 39b3c9cd137523ddbfd86dc8d9ef1620ed4af2d3 /llvm/lib | |
| parent | 01e5290b49bd2a8753c685bb5f02596989a31754 (diff) | |
| download | bcm5719-llvm-b29916cec3f45e5fb5efff5104acf142f348c724.tar.gz bcm5719-llvm-b29916cec3f45e5fb5efff5104acf142f348c724.zip | |
[AArch64][SVE] Integer reduction instructions pattern/intrinsics.
Added pattern matching/intrinsics for the following SVE instructions:
-- saddv, uaddv
-- smaxv, sminv, umaxv, uminv
-- orv, eorv, andv
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 49 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.h | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 28 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/SVEInstrFormats.td | 33 | ||||
| -rw-r--r-- | llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h | 2 |
5 files changed, 106 insertions, 14 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index db00f81e53e..f32f0374122 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1281,6 +1281,13 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::UMINV: return "AArch64ISD::UMINV"; case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV"; case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV"; + case AArch64ISD::SMAXV_PRED: return "AArch64ISD::SMAXV_PRED"; + case AArch64ISD::UMAXV_PRED: return "AArch64ISD::UMAXV_PRED"; + case AArch64ISD::SMINV_PRED: return "AArch64ISD::SMINV_PRED"; + case AArch64ISD::UMINV_PRED: return "AArch64ISD::UMINV_PRED"; + case AArch64ISD::ORV_PRED: return "AArch64ISD::ORV_PRED"; + case AArch64ISD::EORV_PRED: return "AArch64ISD::EORV_PRED"; + case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED"; case AArch64ISD::NOT: return "AArch64ISD::NOT"; case AArch64ISD::BIT: return "AArch64ISD::BIT"; case AArch64ISD::CBZ: return "AArch64ISD::CBZ"; @@ -10520,6 +10527,34 @@ static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, DAG.getConstant(0, dl, MVT::i64)); } +static SDValue LowerSVEIntReduction(SDNode *N, unsigned Opc, + SelectionDAG &DAG) { + SDLoc dl(N); + LLVMContext &Ctx = *DAG.getContext(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + EVT VT = N->getValueType(0); + SDValue Pred = N->getOperand(1); + SDValue Data = N->getOperand(2); + EVT DataVT = Data.getValueType(); + + if (DataVT.getVectorElementType().isScalarInteger() && + (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)) { + if (!TLI.isTypeLegal(DataVT)) + return SDValue(); + + EVT OutputVT = EVT::getVectorVT(Ctx, VT, + AArch64::NeonBitsPerVector / VT.getSizeInBits()); + SDValue Reduce = DAG.getNode(Opc, dl, OutputVT, Pred, Data); + SDValue Zero = DAG.getConstant(0, dl, MVT::i64); + SDValue Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Reduce, Zero); + + return Result; + } + + return SDValue(); +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -10574,6 +10609,20 @@ static SDValue performIntrinsicCombine(SDNode *N, case Intrinsic::aarch64_crc32h: case Intrinsic::aarch64_crc32ch: return tryCombineCRC32(0xffff, N, DAG); + case Intrinsic::aarch64_sve_smaxv: + return LowerSVEIntReduction(N, AArch64ISD::SMAXV_PRED, DAG); + case Intrinsic::aarch64_sve_umaxv: + return LowerSVEIntReduction(N, AArch64ISD::UMAXV_PRED, DAG); + case Intrinsic::aarch64_sve_sminv: + return LowerSVEIntReduction(N, AArch64ISD::SMINV_PRED, DAG); + case Intrinsic::aarch64_sve_uminv: + return LowerSVEIntReduction(N, AArch64ISD::UMINV_PRED, DAG); + case Intrinsic::aarch64_sve_orv: + return LowerSVEIntReduction(N, AArch64ISD::ORV_PRED, DAG); + case Intrinsic::aarch64_sve_eorv: + return LowerSVEIntReduction(N, AArch64ISD::EORV_PRED, DAG); + case Intrinsic::aarch64_sve_andv: + return LowerSVEIntReduction(N, AArch64ISD::ANDV_PRED, DAG); } return SDValue(); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 118ab7f3d25..d54ac6510ca 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -155,6 +155,14 @@ enum NodeType : unsigned { SMAXV, UMAXV, + SMAXV_PRED, + UMAXV_PRED, + SMINV_PRED, + UMINV_PRED, + ORV_PRED, + EORV_PRED, + ANDV_PRED, + // Vector bitwise negation NOT, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c75208e4aac..0fb74f04984 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -28,6 +28,16 @@ def AArch64ld1_gather_uxtw_scaled : SDNode<"AArch64ISD::GLD1_UXTW_SCALED", def AArch64ld1_gather_sxtw_scaled : SDNode<"AArch64ISD::GLD1_SXTW_SCALED", SDT_AArch64_GLD1, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; def AArch64ld1_gather_imm : SDNode<"AArch64ISD::GLD1_IMM", SDT_AArch64_GLD1_IMM, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>; +def SDT_AArch64Reduce : SDTypeProfile<1, 2, [SDTCisVec<1>, SDTCisVec<2>]>; + +def AArch64smaxv_pred : SDNode<"AArch64ISD::SMAXV_PRED", SDT_AArch64Reduce>; +def AArch64umaxv_pred : SDNode<"AArch64ISD::UMAXV_PRED", SDT_AArch64Reduce>; +def AArch64sminv_pred : SDNode<"AArch64ISD::SMINV_PRED", SDT_AArch64Reduce>; +def AArch64uminv_pred : SDNode<"AArch64ISD::UMINV_PRED", SDT_AArch64Reduce>; +def AArch64orv_pred : SDNode<"AArch64ISD::ORV_PRED", SDT_AArch64Reduce>; +def AArch64eorv_pred : SDNode<"AArch64ISD::EORV_PRED", SDT_AArch64Reduce>; +def AArch64andv_pred : SDNode<"AArch64ISD::ANDV_PRED", SDT_AArch64Reduce>; + let Predicates = [HasSVE] in { def RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr">; @@ -71,15 +81,15 @@ let Predicates = [HasSVE] in { defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls>; // SVE predicated integer reductions. - defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv">; - defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv">; - defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv">; - defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv">; - defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv">; - defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv">; - defm ORV_VPZ : sve_int_reduce_2<0b000, "orv">; - defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv">; - defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv">; + defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", int_aarch64_sve_saddv>; + defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", int_aarch64_sve_uaddv, int_aarch64_sve_saddv>; + defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", AArch64smaxv_pred>; + defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", AArch64umaxv_pred>; + defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", AArch64sminv_pred>; + defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv", AArch64uminv_pred>; + defm ORV_VPZ : sve_int_reduce_2<0b000, "orv", AArch64orv_pred>; + defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", AArch64eorv_pred>; + defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", AArch64andv_pred>; defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn">; defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 96a0117c955..2581f611df2 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -293,6 +293,11 @@ class SVE_2_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1, : Pat<(vtd (op vt1:$Op1, vt2:$Op2)), (inst $Op1, $Op2)>; +class SVE_2_Op_Pat_Reduce_To_Neon<ValueType vtd, SDPatternOperator op, ValueType vt1, + ValueType vt2, Instruction inst, SubRegIndex sub> +: Pat<(vtd (op vt1:$Op1, vt2:$Op2)), + (INSERT_SUBREG (vtd (IMPLICIT_DEF)), (inst $Op1, $Op2), sub)>; + class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1, ValueType vt2, ValueType vt3, Instruction inst> : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), @@ -5967,31 +5972,51 @@ class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm, let Inst{4-0} = Vd; } -multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm> { +multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm, SDPatternOperator op> { def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>; def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>; def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>; + + def : SVE_2_Op_Pat<i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_2_Op_Pat<i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>; } -multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm> { +multiclass sve_int_reduce_0_uaddv<bits<3> opc, string asm, SDPatternOperator op, SDPatternOperator opSaddv> { def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>; def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>; def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>; def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64>; + + def : SVE_2_Op_Pat<i64, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>; + def : SVE_2_Op_Pat<i64, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>; + def : SVE_2_Op_Pat<i64, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>; + def : SVE_2_Op_Pat<i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>; + def : SVE_2_Op_Pat<i64, opSaddv, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>; } -multiclass sve_int_reduce_1<bits<3> opc, string asm> { +multiclass sve_int_reduce_1<bits<3> opc, string asm, SDPatternOperator op> { def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8>; def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16>; def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32>; def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64>; + + def : SVE_2_Op_Pat_Reduce_To_Neon<v16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B), bsub>; + def : SVE_2_Op_Pat_Reduce_To_Neon<v8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H), hsub>; + def : SVE_2_Op_Pat_Reduce_To_Neon<v4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S), ssub>; + def : SVE_2_Op_Pat_Reduce_To_Neon<v2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D), dsub>; } -multiclass sve_int_reduce_2<bits<3> opc, string asm> { +multiclass sve_int_reduce_2<bits<3> opc, string asm, SDPatternOperator op> { def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8>; def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16>; def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>; def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>; + + def : SVE_2_Op_Pat_Reduce_To_Neon<v16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B), bsub>; + def : SVE_2_Op_Pat_Reduce_To_Neon<v8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H), hsub>; + def : SVE_2_Op_Pat_Reduce_To_Neon<v4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S), ssub>; + def : SVE_2_Op_Pat_Reduce_To_Neon<v2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D), dsub>; } class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm, diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 57c126fe649..e95cbae8786 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -652,8 +652,8 @@ namespace AArch64 { // in index i*P of a <n x (M*P) x t> vector. The other elements of the // <n x (M*P) x t> vector (such as index 1) are undefined. static constexpr unsigned SVEBitsPerBlock = 128; +const unsigned NeonBitsPerVector = 128; } // end namespace AArch64 - } // end namespace llvm #endif |

