diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 88 |
1 files changed, 49 insertions, 39 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index df9d5bc7c98..e316b7bda13 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5075,12 +5075,6 @@ static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128); } -static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, - SelectionDAG &DAG, const SDLoc &dl) { - assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!"); - return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256); -} - /// Widen a vector to a larger size with the same scalar type, with the new /// elements either zero or undef. static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements, @@ -5291,24 +5285,6 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); } -/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128 -/// instructions. This is used because creating CONCAT_VECTOR nodes of -/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower -/// large BUILD_VECTORS. -static SDValue concat128BitVectors(SDValue V1, SDValue V2, EVT VT, - unsigned NumElems, SelectionDAG &DAG, - const SDLoc &dl) { - SDValue V = insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl); - return insert128BitVector(V, V2, NumElems / 2, DAG, dl); -} - -static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT, - unsigned NumElems, SelectionDAG &DAG, - const SDLoc &dl) { - SDValue V = insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl); - return insert256BitVector(V, V2, NumElems / 2, DAG, dl); -} - static SDValue concatSubVectors(SDValue V1, SDValue V2, EVT VT, unsigned NumElems, SelectionDAG &DAG, const SDLoc &dl, unsigned VectorWidth) { @@ -8609,30 +8585,63 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // 256-bit AVX can use the vinsertf128 instruction // to create 256-bit vectors from two other 128-bit ones. -static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { SDLoc dl(Op); MVT ResVT = Op.getSimpleValueType(); assert((ResVT.is256BitVector() || ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide"); - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - unsigned NumElems = ResVT.getVectorNumElements(); - if (ResVT.is256BitVector()) - return concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl); + unsigned NumOperands = Op.getNumOperands(); + unsigned NumZero = 0; + unsigned NumNonZero = 0; + unsigned NonZeros = 0; + for (unsigned i = 0; i != NumOperands; ++i) { + SDValue SubVec = Op.getOperand(i); + if (SubVec.isUndef()) + continue; + if (ISD::isBuildVectorAllZeros(SubVec.getNode())) + ++NumZero; + else { + assert(i < sizeof(NonZeros) * CHAR_BIT); // Ensure the shift is in range. + NonZeros |= 1 << i; + ++NumNonZero; + } + } + + // If there are zero or one non-zeros we can handle this very simply. + if (NumNonZero <= 1) { + SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) + : DAG.getUNDEF(ResVT); + if (!NumNonZero) + return Vec; + unsigned Idx = countTrailingZeros(NonZeros); + SDValue SubVec = Op.getOperand(Idx); + unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec, + DAG.getIntPtrConstant(Idx * SubVecNumElts, dl)); + } - if (Op.getNumOperands() == 4) { + if (NumOperands > 2) { MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(), ResVT.getVectorNumElements()/2); - SDValue V3 = Op.getOperand(2); - SDValue V4 = Op.getOperand(3); - return concat256BitVectors( - concat128BitVectors(V1, V2, HalfVT, NumElems / 2, DAG, dl), - concat128BitVectors(V3, V4, HalfVT, NumElems / 2, DAG, dl), ResVT, - NumElems, DAG, dl); + ArrayRef<SDUse> Ops = Op->ops(); + SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, + Ops.slice(0, NumOperands/2)); + SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, + Ops.slice(NumOperands/2)); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); } - return concat256BitVectors(V1, V2, ResVT, NumElems, DAG, dl); + + assert(NumNonZero == 2 && "Simple cases not handled?"); + + SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, + DAG.getUNDEF(ResVT), Op.getOperand(0), + DAG.getIntPtrConstant(0, dl)); + unsigned NumElems = ResVT.getVectorNumElements(); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1), + DAG.getIntPtrConstant(NumElems/2, dl)); } // Return true if all the operands of the given CONCAT_VECTORS node are zeros @@ -8689,6 +8698,7 @@ static SDValue isTypePromotionOfi1ZeroUpBits(SDValue Op) { return SDValue(); } +// TODO: Merge this with LowerAVXCONCAT_VECTORS? static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG & DAG) { @@ -8775,7 +8785,7 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, // from two other 128-bit ones. // 512-bit vector may contain 2 256-bit vectors or 4 128-bit vectors - return LowerAVXCONCAT_VECTORS(Op, DAG); + return LowerAVXCONCAT_VECTORS(Op, DAG, Subtarget); } //===----------------------------------------------------------------------===// |