summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp49
1 files changed, 29 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 082d1bafddb..14e54f0e4e2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29652,7 +29652,8 @@ static bool matchBinaryPermuteVectorShuffle(
/// instruction but should only be used to replace chains over a certain depth.
static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
ArrayRef<int> BaseMask, int Depth,
- bool HasVariableMask, SelectionDAG &DAG,
+ bool HasVariableMask,
+ bool AllowVariableMask, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!");
assert((Inputs.size() == 1 || Inputs.size() == 2) &&
@@ -29865,7 +29866,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Depth threshold above which we can efficiently use variable mask shuffles.
int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3;
- bool AllowVariableMask = (Depth >= VariableShuffleDepth) || HasVariableMask;
+ AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask;
bool MaskContainsZeros =
any_of(Mask, [](int M) { return M == SM_SentinelZero; });
@@ -30199,7 +30200,8 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
static SDValue combineX86ShufflesRecursively(
ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root,
ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth,
- bool HasVariableMask, SelectionDAG &DAG, const X86Subtarget &Subtarget) {
+ bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// Bound the depth of our recursive combine because this is ultimately
// quadratic in nature.
const unsigned MaxRecursionDepth = 8;
@@ -30354,18 +30356,23 @@ static SDValue combineX86ShufflesRecursively(
CombinedNodes.push_back(Op.getNode());
// See if we can recurse into each shuffle source op (if it's a target
- // shuffle). The source op should only be combined if it either has a
- // single use (i.e. current Op) or all its users have already been combined.
+ // shuffle). The source op should only be generally combined if it either has
+ // a single use (i.e. current Op) or all its users have already been combined,
+ // if not then we can still combine but should prevent generation of variable
+ // shuffles to avoid constant pool bloat.
// Don't recurse if we already have more source ops than we can combine in
// the remaining recursion depth.
if (Ops.size() < (MaxRecursionDepth - Depth)) {
- for (int i = 0, e = Ops.size(); i < e; ++i)
+ for (int i = 0, e = Ops.size(); i < e; ++i) {
+ bool AllowVar = false;
if (Ops[i].getNode()->hasOneUse() ||
SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode()))
- if (SDValue Res = combineX86ShufflesRecursively(
- Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask,
- DAG, Subtarget))
- return Res;
+ AllowVar = AllowVariableMask;
+ if (SDValue Res = combineX86ShufflesRecursively(
+ Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask,
+ AllowVar, DAG, Subtarget))
+ return Res;
+ }
}
// Attempt to constant fold all of the constant source ops.
@@ -30395,8 +30402,8 @@ static SDValue combineX86ShufflesRecursively(
}
// Finally, try to combine into a single shuffle instruction.
- return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG,
- Subtarget);
+ return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask,
+ AllowVariableMask, DAG, Subtarget);
}
/// Get the PSHUF-style mask from PSHUF node.
@@ -30697,7 +30704,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
DemandedMask[i] = i;
if (SDValue Res = combineX86ShufflesRecursively(
{BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getBitcast(SrcVT, Res));
}
@@ -31316,7 +31323,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// a particular chain.
if (SDValue Res = combineX86ShufflesRecursively(
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
}
@@ -34223,7 +34230,8 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
SDValue Op(N, 0);
if (SDValue Res =
combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false,
+ /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
return SDValue();
@@ -34283,7 +34291,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
}
@@ -34322,7 +34330,8 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
SDValue Op(N, 0);
if (SDValue Res =
combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false,
+ /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
return SDValue();
@@ -34848,7 +34857,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
}
@@ -34885,7 +34894,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue Shuffle = combineX86ShufflesRecursively(
{SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle,
N->getOperand(0).getOperand(1));
}
@@ -37419,7 +37428,7 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
SDValue Op(N, 0);
if (SDValue Res = combineX86ShufflesRecursively(
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
- /*HasVarMask*/ false, DAG, Subtarget))
+ /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;
}
OpenPOWER on IntegriCloud