summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMikhail Maltsev <mikhail.maltsev@arm.com>2019-12-13 13:17:29 +0000
committerMikhail Maltsev <mikhail.maltsev@arm.com>2019-12-13 13:17:29 +0000
commit99581fd4c8e12f5eca38e7cfc5992508a9bfe383 (patch)
treeef38e7222ec01a704bf419825756cab9bd7120b5 /llvm/lib
parent1cc4b603ba79c8bf8f60790cff31f872e7d00142 (diff)
downloadbcm5719-llvm-99581fd4c8e12f5eca38e7cfc5992508a9bfe383.tar.gz
bcm5719-llvm-99581fd4c8e12f5eca38e7cfc5992508a9bfe383.zip
[ARM][MVE] Add vector reduction intrinsics with two vector operands
Summary: This patch adds intrinsics for the following MVE instructions: * VABAV * VMLADAV, VMLSDAV * VMLALDAV, VMLSLDAV * VRMLALDAVH, VRMLSLDAVH Each of the above 4 groups has a corresponding new LLVM IR intrinsic, since the instructions cannot be easily represented using general-purpose IR operations. Reviewers: simon_tatham, ostannard, dmgreen, MarkMurrayARM Reviewed By: MarkMurrayARM Subscribers: merge_guards_bot, kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71062
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp147
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td144
2 files changed, 251 insertions, 40 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 6dd56b35d0a..acbbf20f3ef 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -237,6 +237,27 @@ private:
void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
+ /// Select long MVE vector reductions with two vector operands
+ /// Stride is the number of vector element widths the instruction can operate
+ /// on:
+ /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
+ /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
+ /// Stride is used when addressing the OpcodesS array which contains multiple
+ /// opcodes for each element width.
+ /// TySize is the index into the list of element types listed above
+ void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
+ const uint16_t *OpcodesS, const uint16_t *OpcodesU,
+ size_t Stride, size_t TySize);
+
+ /// Select a 64-bit MVE vector reduction with two vector operands
+ /// arm_mve_vmlldava_[predicated]
+ void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
+ const uint16_t *OpcodesU);
+ /// Select a 72-bit MVE vector rounding reduction with two vector operands
+ /// int_arm_mve_vrmlldavha[_predicated]
+ void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
+ const uint16_t *OpcodesU);
+
/// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
/// should be 2 or 4. The opcode array specifies the instructions
/// used for 8, 16 and 32-bit lane sizes respectively, and each
@@ -2531,6 +2552,96 @@ void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
}
+static bool SDValueToConstBool(SDValue SDVal) {
+ assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
+ ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
+ uint64_t Value = SDValConstant->getZExtValue();
+ assert((Value == 0 || Value == 1) && "expected value 0 or 1");
+ return Value;
+}
+
+void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
+ const uint16_t *OpcodesS,
+ const uint16_t *OpcodesU,
+ size_t Stride, size_t TySize) {
+ assert(TySize < Stride && "Invalid TySize");
+ bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
+ bool IsSub = SDValueToConstBool(N->getOperand(2));
+ bool IsExchange = SDValueToConstBool(N->getOperand(3));
+ if (IsUnsigned) {
+ assert(!IsSub &&
+ "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
+ assert(!IsExchange &&
+ "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
+ }
+
+ auto OpIsZero = [N](size_t OpNo) {
+ if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo)))
+ if (OpConst->getZExtValue() == 0)
+ return true;
+ return false;
+ };
+
+ // If the input accumulator value is not zero, select an instruction with
+ // accumulator, otherwise select an instruction without accumulator
+ bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
+
+ const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
+ if (IsSub)
+ Opcodes += 4 * Stride;
+ if (IsExchange)
+ Opcodes += 2 * Stride;
+ if (IsAccum)
+ Opcodes += Stride;
+ uint16_t Opcode = Opcodes[TySize];
+
+ SDLoc Loc(N);
+ SmallVector<SDValue, 8> Ops;
+ // Push the accumulator operands, if they are used
+ if (IsAccum) {
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(N->getOperand(5));
+ }
+ // Push the two vector operands
+ Ops.push_back(N->getOperand(6));
+ Ops.push_back(N->getOperand(7));
+
+ if (Predicated)
+ AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
+ else
+ AddEmptyMVEPredicateToOps(Ops, Loc);
+
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops));
+}
+
+void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
+ const uint16_t *OpcodesS,
+ const uint16_t *OpcodesU) {
+ EVT VecTy = N->getOperand(6).getValueType();
+ size_t SizeIndex;
+ switch (VecTy.getVectorElementType().getSizeInBits()) {
+ case 16:
+ SizeIndex = 0;
+ break;
+ case 32:
+ SizeIndex = 1;
+ break;
+ default:
+ llvm_unreachable("bad vector element size");
+ }
+
+ SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
+}
+
+void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
+ const uint16_t *OpcodesS,
+ const uint16_t *OpcodesU) {
+ EVT VecTy = N->getOperand(6).getValueType();
+ assert(VecTy.getVectorElementType().getSizeInBits() == 32 &&
+ "bad vector element size");
+ SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
+}
+
void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
const uint16_t *const *Opcodes) {
EVT VT = N->getValueType(0);
@@ -4376,6 +4487,42 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
IntNo == Intrinsic::arm_mve_vadc_predicated);
return;
+ case Intrinsic::arm_mve_vmlldava:
+ case Intrinsic::arm_mve_vmlldava_predicated: {
+ static const uint16_t OpcodesU[] = {
+ ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
+ ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
+ };
+ static const uint16_t OpcodesS[] = {
+ ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
+ ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
+ ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
+ ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
+ ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
+ ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
+ ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
+ ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
+ };
+ SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
+ OpcodesS, OpcodesU);
+ return;
+ }
+
+ case Intrinsic::arm_mve_vrmlldavha:
+ case Intrinsic::arm_mve_vrmlldavha_predicated: {
+ static const uint16_t OpcodesU[] = {
+ ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
+ };
+ static const uint16_t OpcodesS[] = {
+ ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
+ ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
+ ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
+ ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
+ };
+ SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
+ OpcodesS, OpcodesU);
+ return;
+ }
}
break;
}
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 21f0d5e8679..a40231c4aa1 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -567,10 +567,10 @@ class MVE_rDest<dag oops, dag iops, InstrItinClass itin,
let Inst{4} = 0b0;
}
-class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
+class MVE_VABAV<string suffix, bit U, bits<2> size>
: MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm),
NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src",
- pattern> {
+ []> {
bits<4> Qm;
bits<4> Qn;
bits<4> Rda;
@@ -589,12 +589,36 @@ class MVE_VABAV<string suffix, bit U, bits<2> size, list<dag> pattern=[]>
let Inst{0} = 0b1;
}
-def MVE_VABAVs8 : MVE_VABAV<"s8", 0b0, 0b00>;
-def MVE_VABAVs16 : MVE_VABAV<"s16", 0b0, 0b01>;
-def MVE_VABAVs32 : MVE_VABAV<"s32", 0b0, 0b10>;
-def MVE_VABAVu8 : MVE_VABAV<"u8", 0b1, 0b00>;
-def MVE_VABAVu16 : MVE_VABAV<"u16", 0b1, 0b01>;
-def MVE_VABAVu32 : MVE_VABAV<"u32", 0b1, 0b10>;
+multiclass MVE_VABAV_m<MVEVectorVTInfo VTI> {
+ def "" : MVE_VABAV<VTI.Suffix, VTI.Unsigned, VTI.Size>;
+
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(i32 (int_arm_mve_vabav
+ (i32 VTI.Unsigned),
+ (i32 rGPR:$Rda_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (i32 (!cast<Instruction>(NAME)
+ (i32 rGPR:$Rda_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
+
+ def : Pat<(i32 (int_arm_mve_vabav_predicated
+ (i32 VTI.Unsigned),
+ (i32 rGPR:$Rda_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (i32 (!cast<Instruction>(NAME)
+ (i32 rGPR:$Rda_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ }
+}
+
+defm MVE_VABAVs8 : MVE_VABAV_m<MVE_v16s8>;
+defm MVE_VABAVs16 : MVE_VABAV_m<MVE_v8s16>;
+defm MVE_VABAVs32 : MVE_VABAV_m<MVE_v4s32>;
+defm MVE_VABAVu8 : MVE_VABAV_m<MVE_v16u8>;
+defm MVE_VABAVu16 : MVE_VABAV_m<MVE_v8u16>;
+defm MVE_VABAVu32 : MVE_VABAV_m<MVE_v4u32>;
class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
bit A, bit U, bits<2> size, list<dag> pattern=[]>
@@ -803,10 +827,9 @@ defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 0b1>;
defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0b0>;
class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
- bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
- list<dag> pattern=[]>
+ bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0>
: MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix,
- "$RdaDest, $Qn, $Qm", cstr, pattern> {
+ "$RdaDest, $Qn, $Qm", cstr, []> {
bits<4> RdaDest;
bits<3> Qm;
bits<3> Qn;
@@ -824,47 +847,88 @@ class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
let Inst{0} = bit_0;
}
-multiclass MVE_VMLAMLSDAV_A<string iname, string x, string suffix,
- bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
- list<dag> pattern=[]> {
- def ""#x#suffix : MVE_VMLAMLSDAV<iname # x, suffix,
+multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
+ bit sz, bit bit_28, bit X, bit bit_8, bit bit_0> {
+ def ""#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # x, VTI.Suffix,
(ins MQPR:$Qn, MQPR:$Qm), "",
- sz, bit_28, 0b0, X, bit_8, bit_0, pattern>;
- def "a"#x#suffix : MVE_VMLAMLSDAV<iname # "a" # x, suffix,
+ sz, bit_28, 0b0, X, bit_8, bit_0>;
+ def "a"#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # "a" # x, VTI.Suffix,
(ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm),
"$RdaDest = $RdaSrc",
- sz, bit_28, 0b1, X, bit_8, bit_0, pattern>;
+ sz, bit_28, 0b1, X, bit_8, bit_0>;
+ let Predicates = [HasMVEInt] in {
+ def : Pat<(i32 (int_arm_mve_vmldava
+ (i32 VTI.Unsigned),
+ (i32 bit_0) /* subtract */,
+ (i32 X) /* exchange */,
+ (i32 0) /* accumulator */,
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (i32 (!cast<Instruction>(NAME # x # VTI.Suffix)
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
+
+ def : Pat<(i32 (int_arm_mve_vmldava_predicated
+ (i32 VTI.Unsigned),
+ (i32 bit_0) /* subtract */,
+ (i32 X) /* exchange */,
+ (i32 0) /* accumulator */,
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (i32 (!cast<Instruction>(NAME # x # VTI.Suffix)
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+
+ def : Pat<(i32 (int_arm_mve_vmldava
+ (i32 VTI.Unsigned),
+ (i32 bit_0) /* subtract */,
+ (i32 X) /* exchange */,
+ (i32 tGPREven:$RdaSrc),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
+ (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix)
+ (i32 tGPREven:$RdaSrc),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))>;
+
+ def : Pat<(i32 (int_arm_mve_vmldava_predicated
+ (i32 VTI.Unsigned),
+ (i32 bit_0) /* subtract */,
+ (i32 X) /* exchange */,
+ (i32 tGPREven:$RdaSrc),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ (VTI.Pred VCCR:$mask))),
+ (i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix)
+ (i32 tGPREven:$RdaSrc),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ }
}
-multiclass MVE_VMLAMLSDAV_AX<string iname, string suffix, bit sz, bit bit_28,
- bit bit_8, bit bit_0, list<dag> pattern=[]> {
- defm "" : MVE_VMLAMLSDAV_A<iname, "", suffix, sz, bit_28,
- 0b0, bit_8, bit_0, pattern>;
- defm "" : MVE_VMLAMLSDAV_A<iname, "x", suffix, sz, bit_28,
- 0b1, bit_8, bit_0, pattern>;
+multiclass MVE_VMLAMLSDAV_AX<string iname, MVEVectorVTInfo VTI, bit sz,
+ bit bit_28, bit bit_8, bit bit_0> {
+ defm "" : MVE_VMLAMLSDAV_A<iname, "", VTI, sz, bit_28,
+ 0b0, bit_8, bit_0>;
+ defm "" : MVE_VMLAMLSDAV_A<iname, "x", VTI, sz, bit_28,
+ 0b1, bit_8, bit_0>;
}
-multiclass MVE_VMLADAV_multi<string suffix, bit sz, bit bit_8,
- list<dag> pattern=[]> {
- defm "" : MVE_VMLAMLSDAV_AX<"vmladav", "s"#suffix,
- sz, 0b0, bit_8, 0b0, pattern>;
- defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", "u"#suffix,
- sz, 0b1, 0b0, bit_8, 0b0, pattern>;
+multiclass MVE_VMLADAV_multi<MVEVectorVTInfo SVTI, MVEVectorVTInfo UVTI,
+ bit sz, bit bit_8> {
+ defm "" : MVE_VMLAMLSDAV_AX<"vmladav", SVTI,
+ sz, 0b0, bit_8, 0b0>;
+ defm "" : MVE_VMLAMLSDAV_A<"vmladav", "", UVTI,
+ sz, 0b1, 0b0, bit_8, 0b0>;
}
-multiclass MVE_VMLSDAV_multi<string suffix, bit sz, bit bit_28,
- list<dag> pattern=[]> {
- defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", "s"#suffix,
- sz, bit_28, 0b0, 0b1, pattern>;
+multiclass MVE_VMLSDAV_multi<MVEVectorVTInfo VTI, bit sz, bit bit_28> {
+ defm "" : MVE_VMLAMLSDAV_AX<"vmlsdav", VTI,
+ sz, bit_28, 0b0, 0b1>;
}
-defm MVE_VMLADAV : MVE_VMLADAV_multi< "8", 0b0, 0b1>;
-defm MVE_VMLADAV : MVE_VMLADAV_multi<"16", 0b0, 0b0>;
-defm MVE_VMLADAV : MVE_VMLADAV_multi<"32", 0b1, 0b0>;
+defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v16s8, MVE_v16u8, 0b0, 0b1>;
+defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v8s16, MVE_v8u16, 0b0, 0b0>;
+defm MVE_VMLADAV : MVE_VMLADAV_multi<MVE_v4s32, MVE_v4u32, 0b1, 0b0>;
-defm MVE_VMLSDAV : MVE_VMLSDAV_multi< "8", 0b0, 0b1>;
-defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"16", 0b0, 0b0>;
-defm MVE_VMLSDAV : MVE_VMLSDAV_multi<"32", 0b1, 0b0>;
+defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v16s8, 0b0, 0b1>;
+defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v8s16, 0b0, 0b0>;
+defm MVE_VMLSDAV : MVE_VMLSDAV_multi<MVE_v4s32, 0b1, 0b0>;
// vmlav aliases vmladav
foreach acc = ["", "a"] in {
OpenPOWER on IntegriCloud