summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AArch64
diff options
context:
space:
mode:
authorCullen Rhodes <cullen.rhodes@arm.com>2019-12-04 14:20:33 +0000
committerCullen Rhodes <cullen.rhodes@arm.com>2019-12-20 11:58:00 +0000
commit974f00a4369371fae9d25477753c0f68f331e05a (patch)
treea75c90d3eb77b65f1ab37533d21b115fff648bc8 /llvm/lib/Target/AArch64
parentb2371791fc74b2ecad7f608ff8592ec512d098e6 (diff)
downloadbcm5719-llvm-974f00a4369371fae9d25477753c0f68f331e05a.tar.gz
bcm5719-llvm-974f00a4369371fae9d25477753c0f68f331e05a.zip
[AArch64][SVE] Fold constant multiply of element count
Summary: E.g. %0 = tail call i64 @llvm.aarch64.sve.cntw(i32 31) %mul = mul i64 %0, <const> Should emit: cntw x0, all, mul #<const> For <const> in the range 1-16. Patch by Kerry McLaughlin Reviewers: sdesmalen, huntergr, dancgr, rengolin, efriedma Reviewed By: sdesmalen Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71014
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp22
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp23
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td10
3 files changed, 54 insertions, 1 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index e875844ed70..ef06993d618 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -169,6 +169,28 @@ public:
return SelectSVELogicalImm(N, VT, Imm);
}
+ // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
+ template<signed Min, signed Max, signed Scale, bool Shift>
+ bool SelectCntImm(SDValue N, SDValue &Imm) {
+ if (!isa<ConstantSDNode>(N))
+ return false;
+
+ int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
+ if (Shift)
+ MulImm = 1 << MulImm;
+
+ if ((MulImm % std::abs(Scale)) != 0)
+ return false;
+
+ MulImm /= Scale;
+ if ((MulImm >= Min) && (MulImm <= Max)) {
+ Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
+ return true;
+ }
+
+ return false;
+ }
+
/// Form sequences of consecutive 64/128-bit registers for use in NEON
/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
/// between 1 and 4 elements. If it contains a single element that is returned
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7f9a7bd9746..a3dd2e65a12 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9541,6 +9541,19 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
+static bool IsSVECntIntrinsic(SDValue S) {
+ switch(getIntrinsicID(S.getNode())) {
+ default:
+ break;
+ case Intrinsic::aarch64_sve_cntb:
+ case Intrinsic::aarch64_sve_cnth:
+ case Intrinsic::aarch64_sve_cntw:
+ case Intrinsic::aarch64_sve_cntd:
+ return true;
+ }
+ return false;
+}
+
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
@@ -9551,9 +9564,18 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (!isa<ConstantSDNode>(N->getOperand(1)))
return SDValue();
+ SDValue N0 = N->getOperand(0);
ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
const APInt &ConstValue = C->getAPIntValue();
+ // Allow the scaling to be folded into the `cnt` instruction by preventing
+ // the scaling to be obscured here. This makes it easier to pattern match.
+ if (IsSVECntIntrinsic(N0) ||
+ (N0->getOpcode() == ISD::TRUNCATE &&
+ (IsSVECntIntrinsic(N0->getOperand(0)))))
+ if (ConstValue.sge(1) && ConstValue.sle(16))
+ return SDValue();
+
// Multiplication of a power of two plus/minus one can be done more
// cheaply as as shift+add/sub. For now, this is true unilaterally. If
// future CPUs have a cheaper MADD instruction, this may need to be
@@ -9564,7 +9586,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
// e.g. 6=3*2=(2+1)*2.
// TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
// which equals to (1+2)*16-(1+2).
- SDValue N0 = N->getOperand(0);
// TrailingZeroes is used to test if the mul can be lowered to
// shift+add+shift.
unsigned TrailingZeroes = ConstValue.countTrailingZeros();
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 0a3df4f2b71..764ff99a1dd 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -244,6 +244,10 @@ def sve_incdec_imm : Operand<i32>, TImmLeaf<i32, [{
let DecoderMethod = "DecodeSVEIncDecImm";
}
+// This allows i32 immediate extraction from i64 based arithmetic.
+def sve_cnt_mul_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, false>">;
+def sve_cnt_shl_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, true>">;
+
//===----------------------------------------------------------------------===//
// SVE PTrue - These are used extensively throughout the pattern matching so
// it's important we define them first.
@@ -635,6 +639,12 @@ multiclass sve_int_count<bits<3> opc, string asm, SDPatternOperator op> {
def : InstAlias<asm # "\t$Rd",
(!cast<Instruction>(NAME) GPR64:$Rd, 0b11111, 1), 2>;
+ def : Pat<(i64 (mul (op sve_pred_enum:$pattern), (sve_cnt_mul_imm i32:$imm))),
+ (!cast<Instruction>(NAME) sve_pred_enum:$pattern, sve_incdec_imm:$imm)>;
+
+ def : Pat<(i64 (shl (op sve_pred_enum:$pattern), (i64 (sve_cnt_shl_imm i32:$imm)))),
+ (!cast<Instruction>(NAME) sve_pred_enum:$pattern, sve_incdec_imm:$imm)>;
+
def : Pat<(i64 (op sve_pred_enum:$pattern)),
(!cast<Instruction>(NAME) sve_pred_enum:$pattern, 1)>;
}
OpenPOWER on IntegriCloud