diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 36 |
1 files changed, 18 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 17481f7fb26..36d3db44b18 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32033,7 +32033,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, unsigned NumRootElts = RootVT.getVectorNumElements(); unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts; bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() || - (RootVT.isFloatingPoint() && Depth >= 2) || + (RootVT.isFloatingPoint() && Depth >= 1) || (RootVT.is256BitVector() && !Subtarget.hasAVX2()); // Don't combine if we are a AVX512/EVEX target and the mask element size @@ -32072,7 +32072,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 && !(Subtarget.hasAVX2() && BaseMask[0] >= -1 && BaseMask[1] >= -1) && !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) { - if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128) + if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128) return SDValue(); // Nothing to do! MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64); unsigned PermMask = 0; @@ -32117,8 +32117,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, // Which shuffle domains are permitted? // Permit domain crossing at higher combine depths. // TODO: Should we indicate which domain is preferred if both are allowed? - bool AllowFloatDomain = FloatDomain || (Depth > 3); - bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() && + bool AllowFloatDomain = FloatDomain || (Depth >= 3); + bool AllowIntDomain = (!FloatDomain || (Depth >= 3)) && Subtarget.hasSSE2() && (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); // Determine zeroable mask elements. @@ -32153,14 +32153,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, if (V1.getValueType() == MaskVT && V1.getOpcode() == ISD::SCALAR_TO_VECTOR && MayFoldLoad(V1.getOperand(0))) { - if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST) + if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) return SDValue(); // Nothing to do! Res = V1.getOperand(0); Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); return DAG.getBitcast(RootVT, Res); } if (Subtarget.hasAVX2()) { - if (Depth == 1 && Root.getOpcode() == X86ISD::VBROADCAST) + if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) return SDValue(); // Nothing to do! Res = DAG.getBitcast(MaskVT, V1); Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); @@ -32174,7 +32174,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! Res = DAG.getBitcast(ShuffleSrcVT, NewV1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); @@ -32185,7 +32185,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, AllowIntDomain, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! Res = DAG.getBitcast(ShuffleVT, V1); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, @@ -32200,7 +32200,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT, UnaryShuffle) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1); NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2); @@ -32214,7 +32214,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { - if (Depth == 1 && Root.getOpcode() == Shuffle) + if (Depth == 0 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! NewV1 = DAG.getBitcast(ShuffleVT, NewV1); NewV2 = DAG.getBitcast(ShuffleVT, NewV2); @@ -32232,7 +32232,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, uint64_t BitLen, BitIdx; if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) { - if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI) + if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI) return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, @@ -32242,7 +32242,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { - if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) + if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI) return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); V2 = DAG.getBitcast(IntMaskVT, V2); @@ -32255,11 +32255,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, // Don't try to re-form single instruction chains under any circumstances now // that we've done encoding canonicalization for them. - if (Depth < 2) + if (Depth < 1) return SDValue(); // Depth threshold above which we can efficiently use variable mask shuffles. - int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 2 : 3; + int VariableShuffleDepth = Subtarget.hasFastVariableShuffle() ? 1 : 2; AllowVariableMask &= (Depth >= VariableShuffleDepth) || HasVariableMask; bool MaskContainsZeros = @@ -32741,7 +32741,7 @@ static SDValue combineX86ShufflesRecursively( // Bound the depth of our recursive combine because this is ultimately // quadratic in nature. const unsigned MaxRecursionDepth = 8; - if (Depth > MaxRecursionDepth) + if (Depth >= MaxRecursionDepth) return SDValue(); // Directly rip through bitcasts to find the underlying operand. @@ -32944,7 +32944,7 @@ static SDValue combineX86ShufflesRecursively( /// Helper entry wrapper to combineX86ShufflesRecursively. static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, + return combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 0, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget); } @@ -33179,7 +33179,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, for (unsigned i = 0; i != Scale; ++i) DemandedMask[i] = i; if (SDValue Res = combineX86ShufflesRecursively( - {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 1, + {BC}, 0, BC, DemandedMask, {}, /*Depth*/ 0, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(X86ISD::VBROADCAST, DL, VT, DAG.getBitcast(SrcVT, Res)); @@ -38697,7 +38697,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, } if (SDValue Shuffle = combineX86ShufflesRecursively( - {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2, + {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 1, /*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget)) return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, N->getOperand(0).getOperand(1)); |

