diff options
| author | Mikhail Maltsev <mikhail.maltsev@arm.com> | 2019-12-13 13:17:29 +0000 |
|---|---|---|
| committer | Mikhail Maltsev <mikhail.maltsev@arm.com> | 2019-12-13 13:17:29 +0000 |
| commit | 99581fd4c8e12f5eca38e7cfc5992508a9bfe383 (patch) | |
| tree | ef38e7222ec01a704bf419825756cab9bd7120b5 /llvm/lib | |
| parent | 1cc4b603ba79c8bf8f60790cff31f872e7d00142 (diff) | |
| download | bcm5719-llvm-99581fd4c8e12f5eca38e7cfc5992508a9bfe383.tar.gz bcm5719-llvm-99581fd4c8e12f5eca38e7cfc5992508a9bfe383.zip | |
[ARM][MVE] Add vector reduction intrinsics with two vector operands
Summary:
This patch adds intrinsics for the following MVE instructions:
* VABAV
* VMLADAV, VMLSDAV
* VMLALDAV, VMLSLDAV
* VRMLALDAVH, VRMLSLDAVH
Each of the above 4 groups has a corresponding new LLVM IR intrinsic,
since the instructions cannot be easily represented using
general-purpose IR operations.
Reviewers: simon_tatham, ostannard, dmgreen, MarkMurrayARM
Reviewed By: MarkMurrayARM
Subscribers: merge_guards_bot, kristof.beyls, hiraditya, cfe-commits, llvm-commits
Tags: #clang, #llvm
Differential Revision: https://reviews.llvm.org/D71062
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 147 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrMVE.td | 144 |
2 files changed, 251 insertions, 40 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 6dd56b35d0a..acbbf20f3ef 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -237,6 +237,27 @@ private: void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); + /// Select long MVE vector reductions with two vector operands + /// Stride is the number of vector element widths the instruction can operate + /// on: + /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] + /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] + /// Stride is used when addressing the OpcodesS array which contains multiple + /// opcodes for each element width. + /// TySize is the index into the list of element types listed above + void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, + const uint16_t *OpcodesS, const uint16_t *OpcodesU, + size_t Stride, size_t TySize); + + /// Select a 64-bit MVE vector reduction with two vector operands + /// arm_mve_vmlldava_[predicated] + void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, + const uint16_t *OpcodesU); + /// Select a 72-bit MVE vector rounding reduction with two vector operands + /// int_arm_mve_vrmlldavha[_predicated] + void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, + const uint16_t *OpcodesU); + /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs /// should be 2 or 4. The opcode array specifies the instructions /// used for 8, 16 and 32-bit lane sizes respectively, and each @@ -2531,6 +2552,96 @@ void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); } +static bool SDValueToConstBool(SDValue SDVal) { + assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); + ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); + uint64_t Value = SDValConstant->getZExtValue(); + assert((Value == 0 || Value == 1) && "expected value 0 or 1"); + return Value; +} + +void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, + const uint16_t *OpcodesS, + const uint16_t *OpcodesU, + size_t Stride, size_t TySize) { + assert(TySize < Stride && "Invalid TySize"); + bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); + bool IsSub = SDValueToConstBool(N->getOperand(2)); + bool IsExchange = SDValueToConstBool(N->getOperand(3)); + if (IsUnsigned) { + assert(!IsSub && + "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); + assert(!IsExchange && + "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); + } + + auto OpIsZero = [N](size_t OpNo) { + if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo))) + if (OpConst->getZExtValue() == 0) + return true; + return false; + }; + + // If the input accumulator value is not zero, select an instruction with + // accumulator, otherwise select an instruction without accumulator + bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); + + const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; + if (IsSub) + Opcodes += 4 * Stride; + if (IsExchange) + Opcodes += 2 * Stride; + if (IsAccum) + Opcodes += Stride; + uint16_t Opcode = Opcodes[TySize]; + + SDLoc Loc(N); + SmallVector<SDValue, 8> Ops; + // Push the accumulator operands, if they are used + if (IsAccum) { + Ops.push_back(N->getOperand(4)); + Ops.push_back(N->getOperand(5)); + } + // Push the two vector operands + Ops.push_back(N->getOperand(6)); + Ops.push_back(N->getOperand(7)); + + if (Predicated) + AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); + else + AddEmptyMVEPredicateToOps(Ops, Loc); + + CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); +} + +void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, + const uint16_t *OpcodesS, + const uint16_t *OpcodesU) { + EVT VecTy = N->getOperand(6).getValueType(); + size_t SizeIndex; + switch (VecTy.getVectorElementType().getSizeInBits()) { + case 16: + SizeIndex = 0; + break; + case 32: + SizeIndex = 1; + break; + default: + llvm_unreachable("bad vector element size"); + } + + SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); +} + +void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, + const uint16_t *OpcodesS, + const uint16_t *OpcodesU) { + EVT VecTy = N->getOperand(6).getValueType(); + assert(VecTy.getVectorElementType().getSizeInBits() == 32 && + "bad vector element size"); + SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); +} + void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, const uint16_t *const *Opcodes) { EVT VT = N->getValueType(0); @@ -4376,6 +4487,42 @@ void ARMDAGToDAGISel::Select(SDNode *N) { IntNo == Intrinsic::arm_mve_vadc_predicated); return; + case Intrinsic::arm_mve_vmlldava: + case Intrinsic::arm_mve_vmlldava_predicated: { + static const uint16_t OpcodesU[] = { + ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, + ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, + }; + static const uint16_t OpcodesS[] = { + ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, + ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, + ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, + ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, + ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, + ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, + ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, + ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, + }; + SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, + OpcodesS, OpcodesU); + return; + } + + case Intrinsic::arm_mve_vrmlldavha: + case Intrinsic::arm_mve_vrmlldavha_predicated: { + static const uint16_t OpcodesU[] = { + ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, + }; + static const uint16_t OpcodesS[] = { + ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, + ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, + ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, + ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, + }; + SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, + OpcodesS, OpcodesU); + return; + } } break; } diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 21f0d5e8679..a40231c4aa1 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -567,10 +567,10 @@ class MVE_rDest<dag oops, dag iops, InstrItinClass itin, let Inst{4} = 0b0; } -class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]> +class MVE_VABAV<string suffix, bit U, bits<2> size> : MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm), NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src", - pattern> { + []> { bits<4> Qm; bits<4> Qn; bits<4> Rda; @@ -589,12 +589,36 @@ class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]> let Inst{0} = 0b1; } -def MVE_VABAVs8 : MVE_VABAV<"s8", 0b0, 0b00>; -def MVE_VABAVs16 : MVE_VABAV<"s16", 0b0, 0b01>; -def MVE_VABAVs32 : MVE_VABAV<"s32", 0b0, 0b10>; -def MVE_VABAVu8 : MVE_VABAV<"u8", 0b1, 0b00>; -def MVE_VABAVu16 : MVE_VABAV<"u16", 0b1, 0b01>; -def MVE_VABAVu32 : MVE_VABAV<"u32", 0b1, 0b10>; +multiclass MVE_VABAV_m<MVEVectorVTInfo VTI> { + def "" : MVE_VABAV<VTI.Suffix, VTI.Unsigned, VTI.Size>; + + let Predicates = [HasMVEInt] in { + def : Pat<(i32 (int_arm_mve_vabav + (i32 VTI.Unsigned), + (i32 rGPR:$Rda_src), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))), + (i32 (!cast<Instruction>(NAME) + (i32 rGPR:$Rda_src), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>; + + def : Pat<(i32 (int_arm_mve_vabav_predicated + (i32 VTI.Unsigned), + (i32 rGPR:$Rda_src), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + (VTI.Pred VCCR:$mask))), + (i32 (!cast<Instruction>(NAME) + (i32 rGPR:$Rda_src), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + ARMVCCThen, (VTI.Pred VCCR:$mask)))>; + } +} + +defm MVE_VABAVs8 : MVE_VABAV_m<MVE_v16s8>; +defm MVE_VABAVs16 : MVE_VABAV_m<MVE_v8s16>; +defm MVE_VABAVs32 : MVE_VABAV_m<MVE_v4s32>; +defm MVE_VABAVu8 : MVE_VABAV_m<MVE_v16u8>; +defm MVE_VABAVu16 : MVE_VABAV_m<MVE_v8u16>; +defm MVE_VABAVu32 : MVE_VABAV_m<MVE_v4u32>; class MVE_VADDV<string iname, string suffix, dag iops, string cstr, bit A, bit U, bits<2> size, list<dag> pattern=[]> @@ -803,10 +827,9 @@ defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>; defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>; class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr, - bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0, - list<dag> pattern=[]> + bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0> : MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix, - "$RdaDest, $Qn, $Qm", cstr, pattern> { + "$RdaDest, $Qn, $Qm", cstr, []> { bits<4> RdaDest; bits<3> Qm; bits<3> Qn; @@ -824,47 +847,88 @@ class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr, let Inst{0} = bit_0; } -multiclass MVE_VMLAMLSDAV_A<string iname, string x, string suffix, - bit sz, bit bit_28, bit X, bit bit_8, bit bit_0, - list<dag> pattern=[]> { - def ""#x#suffix : MVE_VMLAMLSDAV<iname # x, suffix, +multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI, + bit sz, bit bit_28, bit X, bit bit_8, bit bit_0> { + def ""#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # x, VTI.Suffix, (ins MQPR:$Qn, MQPR:$Qm), "", - sz, bit_28, 0b0, X, bit_8, bit_0, pattern>; - def "a"#x#suffix : MVE_VMLAMLSDAV<iname # "a" # x, suffix, + sz, bit_28, 0b0, X, bit_8, bit_0>; + def "a"#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # "a" # x, VTI.Suffix, (ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm), "$RdaDest = $RdaSrc", - sz, bit_28, 0b1, X, bit_8, bit_0, pattern>; + sz, bit_28, 0b1, X, bit_8, bit_0>; + let Predicates = [HasMVEInt] in { + def : Pat<(i32 (int_arm_mve_vmldava + (i32 VTI.Unsigned), + (i32 bit_0) /* subtract */, + (i32 X) /* exchange */, + (i32 0) /* accumulator */, + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))), + (i32 (!cast<Instruction>(NAME # x # VTI.Suffix) + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>; + + def : Pat<(i32 (int_arm_mve_vmldava_predicated + (i32 VTI.Unsigned), + (i32 bit_0) /* subtract */, + (i32 X) /* exchange */, + (i32 0) /* accumulator */, + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + (VTI.Pred VCCR:$mask))), + (i32 (!cast<Instruction>(NAME # x # VTI.Suffix) + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + ARMVCCThen, (VTI.Pred VCCR:$mask)))>; + + def : Pat<(i32 (int_arm_mve_vmldava + (i32 VTI.Unsigned), + (i32 bit_0) /* subtract */, + (i32 X) /* exchange */, + (i32 tGPREven:$RdaSrc), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))), + (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix) + (i32 tGPREven:$RdaSrc), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>; + + def : Pat<(i32 (int_arm_mve_vmldava_predicated + (i32 VTI.Unsigned), + (i32 bit_0) /* subtract */, + (i32 X) /* exchange */, + (i32 tGPREven:$RdaSrc), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + (VTI.Pred VCCR:$mask))), + (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix) + (i32 tGPREven:$RdaSrc), + (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm), + ARMVCCThen, (VTI.Pred VCCR:$mask)))>; + } } -multiclass MVE_VMLAMLSDAV_AX<string iname, string suffix, bit sz, bit bit_28, - bit bit_8, bit bit_0, list<dag> pattern=[]> { - defm "" : MVE_VMLAMLSDAV_A<iname, "", suffix, sz, bit_28, - 0b0, bit_8, bit_0, pattern>; - defm "" : MVE_VMLAMLSDAV_A<iname, "x", suffix, sz, bit_28, - 0b1, bit_8, bit_0, pattern>; +multiclass MVE_VMLAMLSDAV_AX<string iname, MVEVectorVTInfo VTI, bit sz, + bit bit_28, bit bit_8, bit bit_0> { + defm "" : MVE_VMLAMLSDAV_A<iname, "", VTI, sz, bit_28, + 0b0, bit_8, bit_0>; + defm "" : MVE_VMLAMLSDAV_A<iname, "x", VTI, sz, bit_28, + 0b1, bit_8, bit_0>; } -multiclass MVE_VMLADAV_multi<string suffix, bit sz, bit bit_8, - list<dag> pattern=[]> { - defm "" : MVE_VMLAMLSDAV_AX<"vmladav", "s"#suffix, - sz, 0b0, bit_8, 0b0, pattern>; - defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", "u"#suffix, - sz, 0b1, 0b0, bit_8, 0b0, pattern>; +multiclass MVE_VMLADAV_multi<MVEVectorVTInfo SVTI, MVEVectorVTInfo UVTI, + bit sz, bit bit_8> { + defm "" : MVE_VMLAMLSDAV_AX<"vmladav", SVTI, + sz, 0b0, bit_8, 0b0>; + defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", UVTI, + sz, 0b1, 0b0, bit_8, 0b0>; } -multiclass MVE_VMLSDAV_multi<string suffix, bit sz, bit bit_28, - list<dag> pattern=[]> { - defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", "s"#suffix, - sz, bit_28, 0b0, 0b1, pattern>; +multiclass MVE_VMLSDAV_multi<MVEVectorVTInfo VTI, bit sz, bit bit_28> { + defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", VTI, + sz, bit_28, 0b0, 0b1>; } -defm MVE_VMLADAV : MVE_VMLADAV_multi< "8", 0b0, 0b1>; -defm MVE_VMLADAV : MVE_VMLADAV_multi<"16", 0b0, 0b0>; -defm MVE_VMLADAV : MVE_VMLADAV_multi<"32", 0b1, 0b0>; +defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v16s8, MVE_v16u8, 0b0, 0b1>; +defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v8s16, MVE_v8u16, 0b0, 0b0>; +defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v4s32, MVE_v4u32, 0b1, 0b0>; -defm MVE_VMLSDAV : MVE_VMLSDAV_multi< "8", 0b0, 0b1>; -defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"16", 0b0, 0b0>; -defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"32", 0b1, 0b0>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v16s8, 0b0, 0b1>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v8s16, 0b0, 0b0>; +defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v4s32, 0b1, 0b0>; // vmlav aliases vmladav foreach acc = ["", "a"] in { |

