summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp36
1 files changed, 18 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 17481f7fb26..36d3db44b18 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32033,7 +32033,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
unsigned NumRootElts = RootVT.getVectorNumElements();
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() ||
- (RootVT.isFloatingPoint() && Depth >= 2) ||
+ (RootVT.isFloatingPoint() && Depth >= 1) ||
(RootVT.is256BitVector() && !Subtarget.hasAVX2());
// Don't combine if we are a AVX512/EVEX target and the mask element size
@@ -32072,7 +32072,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
!(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) &&
!isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
- if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128)
+ if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
return SDValue(); // Nothing to do!
MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
unsigned PermMask = 0;
@@ -32117,8 +32117,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Which shuffle domains are permitted?
// Permit domain crossing at higher combine depths.
// TODO: Should we indicate which domain is preferred if both are allowed?
- bool AllowFloatDomain = FloatDomain || (Depth > 3);
- bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() &&
+ bool AllowFloatDomain = FloatDomain || (Depth >= 3);
+ bool AllowIntDomain = (!FloatDomain || (Depth >= 3)) && Subtarget.hasSSE2() &&
(!MaskVT.is256BitVector() || Subtarget.hasAVX2());
// Determine zeroable mask elements.
@@ -32153,14 +32153,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (V1.getValueType() == MaskVT &&
V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
MayFoldLoad(V1.getOperand(0))) {
- if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
+ if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
return SDValue(); // Nothing to do!
Res = V1.getOperand(0);
Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
return DAG.getBitcast(RootVT, Res);
}
if (Subtarget.hasAVX2()) {
- if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST)
+ if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(MaskVT, V1);
Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res);
@@ -32174,7 +32174,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
- if (Depth == 1 && Root.getOpcode() == Shuffle)
+ if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleSrcVT, NewV1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
@@ -32185,7 +32185,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
AllowIntDomain, Subtarget, Shuffle, ShuffleVT,
PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
- if (Depth == 1 && Root.getOpcode() == Shuffle)
+ if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleVT, V1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
@@ -32200,7 +32200,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
ShuffleVT, UnaryShuffle) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
- if (Depth == 1 && Root.getOpcode() == Shuffle)
+ if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1);
NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2);
@@ -32214,7 +32214,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1,
NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) &&
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
- if (Depth == 1 && Root.getOpcode() == Shuffle)
+ if (Depth == 0 && Root.getOpcode() == Shuffle)
return SDValue(); // Nothing to do!
NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
@@ -32232,7 +32232,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
uint64_t BitLen, BitIdx;
if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx,
Zeroable)) {
- if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI)
+ if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI)
return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
@@ -32242,7 +32242,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
}
if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
- if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
+ if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI)
return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
V2 = DAG.getBitcast(IntMaskVT, V2);
@@ -32255,11 +32255,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Don't try to re-form single instruction chains under any circumstances now
// that we've done encoding canonicalization for them.
- if (Depth < 2)
+ if (Depth < 1)
return SDValue();
// Depth threshold above which we can efficiently use variable mask shuffles.
- int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3;
+ int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2;
AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask;
bool MaskContainsZeros =
@@ -32741,7 +32741,7 @@ static SDValue combineX86ShufflesRecursively(
// Bound the depth of our recursive combine because this is ultimately
// quadratic in nature.
const unsigned MaxRecursionDepth = 8;
- if (Depth > MaxRecursionDepth)
+ if (Depth >= MaxRecursionDepth)
return SDValue();
// Directly rip through bitcasts to find the underlying operand.
@@ -32944,7 +32944,7 @@ static SDValue combineX86ShufflesRecursively(
/// Helper entry wrapper to combineX86ShufflesRecursively.
static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1,
+ return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 0,
/*HasVarMask*/ false,
/*AllowVarMask*/ true, DAG, Subtarget);
}
@@ -33179,7 +33179,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
for (unsigned i = 0; i != Scale; ++i)
DemandedMask[i] = i;
if (SDValue Res = combineX86ShufflesRecursively(
- {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1,
+ {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0,
/*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getBitcast(SrcVT, Res));
@@ -38697,7 +38697,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
}
if (SDValue Shuffle = combineX86ShufflesRecursively(
- {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2,
+ {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1,
/*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle,
N->getOperand(0).getOperand(1));
OpenPOWER on IntegriCloud