diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 49 |
1 files changed, 29 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 082d1bafddb..14e54f0e4e2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29652,7 +29652,8 @@ static bool matchBinaryPermuteVectorShuffle( /// instruction but should only be used to replace chains over a certain depth. static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth, - bool HasVariableMask, SelectionDAG &DAG, + bool HasVariableMask, + bool AllowVariableMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!"); assert((Inputs.size() == 1 || Inputs.size() == 2) && @@ -29865,7 +29866,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, // Depth threshold above which we can efficiently use variable mask shuffles. int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3; - bool AllowVariableMask = (Depth >= VariableShuffleDepth) || HasVariableMask; + AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask; bool MaskContainsZeros = any_of(Mask, [](int M) { return M == SM_SentinelZero; }); @@ -30199,7 +30200,8 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops, static SDValue combineX86ShufflesRecursively( ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root, ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth, - bool HasVariableMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { + bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { // Bound the depth of our recursive combine because this is ultimately // quadratic in nature. const unsigned MaxRecursionDepth = 8; @@ -30354,18 +30356,23 @@ static SDValue combineX86ShufflesRecursively( CombinedNodes.push_back(Op.getNode()); // See if we can recurse into each shuffle source op (if it's a target - // shuffle). The source op should only be combined if it either has a - // single use (i.e. current Op) or all its users have already been combined. + // shuffle). The source op should only be generally combined if it either has + // a single use (i.e. current Op) or all its users have already been combined, + // if not then we can still combine but should prevent generation of variable + // shuffles to avoid constant pool bloat. // Don't recurse if we already have more source ops than we can combine in // the remaining recursion depth. if (Ops.size() < (MaxRecursionDepth - Depth)) { - for (int i = 0, e = Ops.size(); i < e; ++i) + for (int i = 0, e = Ops.size(); i < e; ++i) { + bool AllowVar = false; if (Ops[i].getNode()->hasOneUse() || SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode())) - if (SDValue Res = combineX86ShufflesRecursively( - Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask, - DAG, Subtarget)) - return Res; + AllowVar = AllowVariableMask; + if (SDValue Res = combineX86ShufflesRecursively( + Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask, + AllowVar, DAG, Subtarget)) + return Res; + } } // Attempt to constant fold all of the constant source ops. @@ -30395,8 +30402,8 @@ static SDValue combineX86ShufflesRecursively( } // Finally, try to combine into a single shuffle instruction. - return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG, - Subtarget); + return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, + AllowVariableMask, DAG, Subtarget); } /// Get the PSHUF-style mask from PSHUF node. @@ -30697,7 +30704,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, DemandedMask[i] = i; if (SDValue Res = combineX86ShufflesRecursively( {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(X86ISD::VBROADCAST, DL, VT, DAG.getBitcast(SrcVT, Res)); } @@ -31316,7 +31323,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, // a particular chain. if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; } @@ -34223,7 +34230,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, + /*AllowVarMask*/ true, DAG, Subtarget)) return Res; return SDValue(); @@ -34283,7 +34291,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; } @@ -34322,7 +34330,8 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG, SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, + /*AllowVarMask*/ true, DAG, Subtarget)) return Res; return SDValue(); @@ -34848,7 +34857,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; } @@ -34885,7 +34894,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (SDValue Shuffle = combineX86ShufflesRecursively( {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, N->getOperand(0).getOperand(1)); } @@ -37419,7 +37428,7 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; } |

