diff options
author | Cullen Rhodes <cullen.rhodes@arm.com> | 2019-12-04 14:20:33 +0000 |
---|---|---|
committer | Cullen Rhodes <cullen.rhodes@arm.com> | 2019-12-20 11:58:00 +0000 |
commit | 974f00a4369371fae9d25477753c0f68f331e05a (patch) | |
tree | a75c90d3eb77b65f1ab37533d21b115fff648bc8 /llvm/lib/Target/AArch64 | |
parent | b2371791fc74b2ecad7f608ff8592ec512d098e6 (diff) | |
download | bcm5719-llvm-974f00a4369371fae9d25477753c0f68f331e05a.tar.gz bcm5719-llvm-974f00a4369371fae9d25477753c0f68f331e05a.zip |
[AArch64][SVE] Fold constant multiply of element count
Summary:
E.g.
%0 = tail call i64 @llvm.aarch64.sve.cntw(i32 31)
%mul = mul i64 %0, <const>
Should emit:
cntw x0, all, mul #<const>
For <const> in the range 1-16.
Patch by Kerry McLaughlin
Reviewers: sdesmalen, huntergr, dancgr, rengolin, efriedma
Reviewed By: sdesmalen
Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71014
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 22 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/SVEInstrFormats.td | 10 |
3 files changed, 54 insertions, 1 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index e875844ed70..ef06993d618 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -169,6 +169,28 @@ public: return SelectSVELogicalImm(N, VT, Imm); } + // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. + template<signed Min, signed Max, signed Scale, bool Shift> + bool SelectCntImm(SDValue N, SDValue &Imm) { + if (!isa<ConstantSDNode>(N)) + return false; + + int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); + if (Shift) + MulImm = 1 << MulImm; + + if ((MulImm % std::abs(Scale)) != 0) + return false; + + MulImm /= Scale; + if ((MulImm >= Min) && (MulImm <= Max)) { + Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); + return true; + } + + return false; + } + /// Form sequences of consecutive 64/128-bit registers for use in NEON /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have /// between 1 and 4 elements. If it contains a single element that is returned diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 7f9a7bd9746..a3dd2e65a12 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9541,6 +9541,19 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); } +static bool IsSVECntIntrinsic(SDValue S) { + switch(getIntrinsicID(S.getNode())) { + default: + break; + case Intrinsic::aarch64_sve_cntb: + case Intrinsic::aarch64_sve_cnth: + case Intrinsic::aarch64_sve_cntw: + case Intrinsic::aarch64_sve_cntd: + return true; + } + return false; +} + static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -9551,9 +9564,18 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, if (!isa<ConstantSDNode>(N->getOperand(1))) return SDValue(); + SDValue N0 = N->getOperand(0); ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1)); const APInt &ConstValue = C->getAPIntValue(); + // Allow the scaling to be folded into the `cnt` instruction by preventing + // the scaling to be obscured here. This makes it easier to pattern match. + if (IsSVECntIntrinsic(N0) || + (N0->getOpcode() == ISD::TRUNCATE && + (IsSVECntIntrinsic(N0->getOperand(0))))) + if (ConstValue.sge(1) && ConstValue.sle(16)) + return SDValue(); + // Multiplication of a power of two plus/minus one can be done more // cheaply as as shift+add/sub. For now, this is true unilaterally. If // future CPUs have a cheaper MADD instruction, this may need to be @@ -9564,7 +9586,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, // e.g. 6=3*2=(2+1)*2. // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45 // which equals to (1+2)*16-(1+2). - SDValue N0 = N->getOperand(0); // TrailingZeroes is used to test if the mul can be lowered to // shift+add+shift. unsigned TrailingZeroes = ConstValue.countTrailingZeros(); diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 0a3df4f2b71..764ff99a1dd 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -244,6 +244,10 @@ def sve_incdec_imm : Operand<i32>, TImmLeaf<i32, [{ let DecoderMethod = "DecodeSVEIncDecImm"; } +// This allows i32 immediate extraction from i64 based arithmetic. +def sve_cnt_mul_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, false>">; +def sve_cnt_shl_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, true>">; + //===----------------------------------------------------------------------===// // SVE PTrue - These are used extensively throughout the pattern matching so // it's important we define them first. @@ -635,6 +639,12 @@ multiclass sve_int_count<bits<3> opc, string asm, SDPatternOperator op> { def : InstAlias<asm # "\t$Rd", (!cast<Instruction>(NAME) GPR64:$Rd, 0b11111, 1), 2>; + def : Pat<(i64 (mul (op sve_pred_enum:$pattern), (sve_cnt_mul_imm i32:$imm))), + (!cast<Instruction>(NAME) sve_pred_enum:$pattern, sve_incdec_imm:$imm)>; + + def : Pat<(i64 (shl (op sve_pred_enum:$pattern), (i64 (sve_cnt_shl_imm i32:$imm)))), + (!cast<Instruction>(NAME) sve_pred_enum:$pattern, sve_incdec_imm:$imm)>; + def : Pat<(i64 (op sve_pred_enum:$pattern)), (!cast<Instruction>(NAME) sve_pred_enum:$pattern, 1)>; } |