diff options
author | Sanjay Patel <spatel@rotateright.com> | 2018-10-21 20:13:29 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2018-10-21 20:13:29 +0000 |
commit | e439cc2745323d2ac0e61de1e04efa0098f4b1a8 (patch) | |
tree | 6365076050c6dc131a967da4aec0508cd91b0f4b /llvm/lib/CodeGen/SelectionDAG | |
parent | e967a12733565fff0beb16865bd21e381b75b250 (diff) | |
download | bcm5719-llvm-e439cc2745323d2ac0e61de1e04efa0098f4b1a8.tar.gz bcm5719-llvm-e439cc2745323d2ac0e61de1e04efa0098f4b1a8.zip |
[DAGCombiner] reduce insert+bitcast+extract vector ops to truncate (PR39016)
This is a late backend subset of the IR transform added with:
D52439
We can confirm that the conversion to a 'trunc' is correct by running:
$ opt -instcombine -data-layout="e"
(assuming the IR transforms are correct; change "e" to "E" for big-endian)
As discussed in PR39016:
https://bugs.llvm.org/show_bug.cgi?id=39016
...the pattern may emerge during legalization, so that's we are waiting for an
insertelement to become a scalar_to_vector in the pattern matching here.
The DAG allows for fun variations that are not possible in IR. Result types for
extracts and scalar_to_vector don't necessarily match input types, so that means
we have to be a bit more careful in the transform (see code comments).
The tests show that we don't handle cases that require a shift (as we did in the
IR version). I've left that as a potential follow-up because I'm not sure if
that's a real concern at this late stage.
Differential Revision: https://reviews.llvm.org/D53201
llvm-svn: 344872
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 |
1 files changed, 29 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 11cc699ffe1..381efb9cb94 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15503,16 +15503,41 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // converts. } - if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST) { + // TODO: These transforms should not require the 'hasOneUse' restriction, but + // there are regressions on multiple targets without it. We can end up with a + // mess of scalar and vector code if we reduce only part of the DAG to scalar. + if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && VT.isInteger() && + InVec.hasOneUse()) { // The vector index of the LSBs of the source depend on the endian-ness. bool IsLE = DAG.getDataLayout().isLittleEndian(); - + unsigned ExtractIndex = ConstEltNo->getZExtValue(); // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x) unsigned BCTruncElt = IsLE ? 0 : VT.getVectorNumElements() - 1; SDValue BCSrc = InVec.getOperand(0); - if (InVec.hasOneUse() && ConstEltNo->getZExtValue() == BCTruncElt && - VT.isInteger() && BCSrc.getValueType().isScalarInteger()) + if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger()) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc); + + if (LegalTypes && BCSrc.getValueType().isInteger() && + BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt --> + // trunc i64 X to i32 + SDValue X = BCSrc.getOperand(0); + assert(X.getValueType().isScalarInteger() && NVT.isScalarInteger() && + "Extract element and scalar to vector can't change element type " + "from FP to integer."); + unsigned XBitWidth = X.getValueSizeInBits(); + unsigned VecEltBitWidth = VT.getScalarSizeInBits(); + BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1; + + // An extract element return value type can be wider than its vector + // operand element type. In that case, the high bits are undefined, so + // it's possible that we may need to extend rather than truncate. + if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) { + assert(XBitWidth % VecEltBitWidth == 0 && + "Scalar bitwidth must be a multiple of vector element bitwidth"); + return DAG.getAnyExtOrTrunc(X, SDLoc(N), NVT); + } + } } // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val |