diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-08-09 12:30:02 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-08-09 12:30:02 +0000 |
| commit | 01ae462fef748212bfe42c2555bc3eb1f4309d0f (patch) | |
| tree | 1a211d8b95013ae17c8f0c57a2a438c83b142a62 /llvm/lib | |
| parent | bf7f18b79c1315eb482a580a9be81e3aa7dd55ac (diff) | |
| download | bcm5719-llvm-01ae462fef748212bfe42c2555bc3eb1f4309d0f.tar.gz bcm5719-llvm-01ae462fef748212bfe42c2555bc3eb1f4309d0f.zip | |
[X86][SSE] Combine (some) target shuffles with multiple uses
As discussed on D41794, we have many cases where we fail to combine shuffles as the input operands have other uses.
This patch permits these shuffles to be combined as long as they don't introduce additional variable shuffle masks, which should reduce instruction dependencies and allow the total number of shuffles to still drop without increasing the constant pool.
However, this may mean that some memory folds may no longer occur, and on pre-AVX require the occasional extra register move.
This also exposes some poor PMULDQ/PMULUDQ codegen which was doing unnecessary upper/lower calculations which will in fact fold to zero/undef - the fix will be added in a followup commit.
Differential Revision: https://reviews.llvm.org/D50328
llvm-svn: 339335
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 49 |
1 files changed, 29 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 082d1bafddb..14e54f0e4e2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29652,7 +29652,8 @@ static bool matchBinaryPermuteVectorShuffle( /// instruction but should only be used to replace chains over a certain depth. static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth, - bool HasVariableMask, SelectionDAG &DAG, + bool HasVariableMask, + bool AllowVariableMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!"); assert((Inputs.size() == 1 || Inputs.size() == 2) && @@ -29865,7 +29866,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, // Depth threshold above which we can efficiently use variable mask shuffles. int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3; - bool AllowVariableMask = (Depth >= VariableShuffleDepth) || HasVariableMask; + AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask; bool MaskContainsZeros = any_of(Mask, [](int M) { return M == SM_SentinelZero; }); @@ -30199,7 +30200,8 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops, static SDValue combineX86ShufflesRecursively( ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root, ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth, - bool HasVariableMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { + bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { // Bound the depth of our recursive combine because this is ultimately // quadratic in nature. const unsigned MaxRecursionDepth = 8; @@ -30354,18 +30356,23 @@ static SDValue combineX86ShufflesRecursively( CombinedNodes.push_back(Op.getNode()); // See if we can recurse into each shuffle source op (if it's a target - // shuffle). The source op should only be combined if it either has a - // single use (i.e. current Op) or all its users have already been combined. + // shuffle). The source op should only be generally combined if it either has + // a single use (i.e. current Op) or all its users have already been combined, + // if not then we can still combine but should prevent generation of variable + // shuffles to avoid constant pool bloat. // Don't recurse if we already have more source ops than we can combine in // the remaining recursion depth. if (Ops.size() < (MaxRecursionDepth - Depth)) { - for (int i = 0, e = Ops.size(); i < e; ++i) + for (int i = 0, e = Ops.size(); i < e; ++i) { + bool AllowVar = false; if (Ops[i].getNode()->hasOneUse() || SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode())) - if (SDValue Res = combineX86ShufflesRecursively( - Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask, - DAG, Subtarget)) - return Res; + AllowVar = AllowVariableMask; + if (SDValue Res = combineX86ShufflesRecursively( + Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask, + AllowVar, DAG, Subtarget)) + return Res; + } } // Attempt to constant fold all of the constant source ops. @@ -30395,8 +30402,8 @@ static SDValue combineX86ShufflesRecursively( } // Finally, try to combine into a single shuffle instruction. - return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG, - Subtarget); + return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, + AllowVariableMask, DAG, Subtarget); } /// Get the PSHUF-style mask from PSHUF node. @@ -30697,7 +30704,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, DemandedMask[i] = i; if (SDValue Res = combineX86ShufflesRecursively( {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(X86ISD::VBROADCAST, DL, VT, DAG.getBitcast(SrcVT, Res)); } @@ -31316,7 +31323,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, // a particular chain. if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; } @@ -34223,7 +34230,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, + /*AllowVarMask*/ true, DAG, Subtarget)) return Res; return SDValue(); @@ -34283,7 +34291,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; } @@ -34322,7 +34330,8 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG, SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, + /*AllowVarMask*/ true, DAG, Subtarget)) return Res; return SDValue(); @@ -34848,7 +34857,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; } @@ -34885,7 +34894,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (SDValue Shuffle = combineX86ShufflesRecursively( {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, N->getOperand(0).getOperand(1)); } @@ -37419,7 +37428,7 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, SDValue Op(N, 0); if (SDValue Res = combineX86ShufflesRecursively( {Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, Subtarget)) + /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return Res; } |

