diff options
| author | Michael Kuperstein <michael.m.kuperstein@intel.com> | 2014-12-17 12:32:17 +0000 |
|---|---|---|
| committer | Michael Kuperstein <michael.m.kuperstein@intel.com> | 2014-12-17 12:32:17 +0000 |
| commit | 047b1a0400fe75e5d6017c287383153beb5f29eb (patch) | |
| tree | 8e3d505d8d07fee1b57b1bd30e7100fc46e9ba26 /llvm/lib | |
| parent | 9941195a9ff05d1f29955473e0f50655803f8e92 (diff) | |
| download | bcm5719-llvm-047b1a0400fe75e5d6017c287383153beb5f29eb.tar.gz bcm5719-llvm-047b1a0400fe75e5d6017c287383153beb5f29eb.zip | |
[DAGCombine] Slightly improve lowering of BUILD_VECTOR into a shuffle.
This handles the case of a BUILD_VECTOR being constructed out of elements extracted from a vector twice the size of the result vector. Previously this was always scalarized. Now, we try to construct a shuffle node that feeds on extract_subvectors.
This fixes PR15872 and provides a partial fix for PR21711.
Differential Revision: http://reviews.llvm.org/D6678
llvm-svn: 224429
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 |
3 files changed, 34 insertions, 11 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ed3d06cc920..8297e841469 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10783,9 +10783,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { SDValue ExtVal = Extract.getOperand(1); unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); if (Extract.getOperand(0) == VecIn1) { - if (ExtIndex > VT.getVectorNumElements()) - return SDValue(); - Mask.push_back(ExtIndex); continue; } @@ -10805,20 +10802,34 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (VecIn2.getNode()) return SDValue(); - // We only support widening of vectors which are half the size of the - // output registers. For example XMM->YMM widening on X86 with AVX. - if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) - return SDValue(); - // If the input vector type has a different base type to the output // vector type, bail out. if (VecIn1.getValueType().getVectorElementType() != VT.getVectorElementType()) return SDValue(); - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + // If the input vector is too small, widen it. + // We only support widening of vectors which are half the size of the + // output registers. For example XMM->YMM widening on X86 with AVX. + EVT VecInT = VecIn1.getValueType(); + if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { + // Widen the input vector by adding undef values. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, + VecIn1, DAG.getUNDEF(VecIn1.getValueType())); + } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { + // If the input vector is too large, try to split it. + if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) + return SDValue(); + + // Try to replace VecIn1 with two extract_subvectors + // No need to update the masks, they should still be correct. + VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); + VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(0, TLI.getVectorIdxTy())); + UsesZeroVector = false; + } else + return SDValue(); } if (UsesZeroVector) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2678ca645fd..6a6b20e81d9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3851,6 +3851,14 @@ bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } +bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, + unsigned Index) const { + if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) + return false; + + return (Index == 0 || Index == ResVT.getVectorNumElements()); +} + /// isUndefOrInRange - Return true if Val is undef or if its value falls within /// the specified range (L, H]. static bool isUndefOrInRange(int Val, int Low, int Hi) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index b793171e2c5..c5e3e14c545 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -791,6 +791,10 @@ namespace llvm { bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; + /// Return true if EXTRACT_SUBVECTOR is cheap for this result type + /// with this index. + bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override; + /// Intel processors have a unified instruction and data cache const char * getClearCacheBuiltinName() const override { return nullptr; // nothing to do, move along. |

