diff options
author | Sanjay Patel <spatel@rotateright.com> | 2018-10-30 14:14:34 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2018-10-30 14:14:34 +0000 |
commit | 8b207defeac47d7394cb1e68011b031729d77791 (patch) | |
tree | 6b6f220b1e4f0187c3a3167900b5eba74acc074c /llvm/lib/CodeGen/SelectionDAG | |
parent | bd74554f540a20ef6d3a95e1cd38dd4593f53cb1 (diff) | |
download | bcm5719-llvm-8b207defeac47d7394cb1e68011b031729d77791.tar.gz bcm5719-llvm-8b207defeac47d7394cb1e68011b031729d77791.zip |
[DAGCombiner] narrow vector binops when extraction is cheap
Narrowing vector binops came up in the demanded bits discussion in D52912.
I don't think we're going to be able to do this transform in IR as a canonicalization
because of the risk of creating unsupported widths for vector ops, but we already have
a DAG TLI hook to allow what I was hoping for: isExtractSubvectorCheap(). This is
currently enabled for x86, ARM, and AArch64 (although only x86 has existing regression
test diffs).
This is artificially limited to not look through bitcasts because there are so many
test diffs already, but that's marked with a TODO and is a small follow-up.
Differential Revision: https://reviews.llvm.org/D53784
llvm-svn: 345602
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 41 |
1 files changed, 30 insertions, 11 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 64c7dca0f6e..742ca02a03d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16673,10 +16673,8 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } -/// If we are extracting a subvector produced by a wide binary operator with at -/// at least one operand that was the result of a vector concatenation, then try -/// to use the narrow vector operands directly to avoid the concatenation and -/// extraction. +/// If we are extracting a subvector produced by a wide binary operator try +/// to use a narrow binary operator and/or avoid concatenation and extraction. static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share // some of these bailouts with other transforms. @@ -16697,22 +16695,43 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { if (!WideBVT.isVector()) return SDValue(); + EVT VT = Extract->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + unsigned ExtractIndex = ExtractIndexC->getZExtValue(); + assert(ExtractIndex % NumElems == 0 && + "Extract index is not a multiple of the vector length."); + EVT SrcVT = Extract->getOperand(0).getValueType(); + unsigned NumSrcElems = SrcVT.getVectorNumElements(); + unsigned NarrowingRatio = NumSrcElems / NumElems; + // Bail out if the target does not support a narrower version of the binop. unsigned BOpcode = BinOp.getOpcode(); + unsigned WideNumElts = WideBVT.getVectorNumElements(); EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), - WideBVT.getVectorNumElements() / 2); + WideNumElts / NarrowingRatio); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) return SDValue(); + // If extraction is cheap, we don't need to look at the binop operands + // for concat ops. The narrow binop alone makes this transform profitable. + // TODO: We're not dealing with the bitcasted pattern here. That limitation + // should be lifted. + if (Extract->getOperand(0) == BinOp && BinOp.hasOneUse() && + TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtractIndex)) { + // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N) + SDLoc DL(Extract); + SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(0), Extract->getOperand(1)); + SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(1), Extract->getOperand(1)); + return DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, + BinOp.getNode()->getFlags()); + } + // Only handle the case where we are doubling and then halving. A larger ratio // may require more than two narrow binops to replace the wide binop. - EVT VT = Extract->getValueType(0); - unsigned NumElems = VT.getVectorNumElements(); - unsigned ExtractIndex = ExtractIndexC->getZExtValue(); - assert(ExtractIndex % NumElems == 0 && - "Extract index is not a multiple of the vector length."); - if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2) + if (NarrowingRatio != 2) return SDValue(); // TODO: The motivating case for this transform is an x86 AVX1 target. That |