diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 120 |
1 files changed, 46 insertions, 74 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ed8b3890360..c94fc5ed44b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27611,11 +27611,11 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, /// into either a single instruction if there is a special purpose instruction /// for this operation, or into a PSHUFB instruction which is a fully general /// instruction but should only be used to replace chains over a certain depth. -static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, - ArrayRef<int> BaseMask, int Depth, - bool HasVariableMask, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { +static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, + ArrayRef<int> BaseMask, int Depth, + bool HasVariableMask, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!"); assert((Inputs.size() == 1 || Inputs.size() == 2) && "Unexpected number of shuffle inputs!"); @@ -27640,9 +27640,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, unsigned NumBaseMaskElts = BaseMask.size(); if (NumBaseMaskElts == 1) { assert(BaseMask[0] == 0 && "Invalid shuffle index found!"); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, V1); } unsigned RootSizeInBits = RootVT.getSizeInBits(); @@ -27660,7 +27658,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, bool IsEVEXShuffle = RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128); if (IsEVEXShuffle && (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits)) - return false; + return SDValue(); // TODO - handle 128/256-bit lane shuffles of 512-bit vectors. @@ -27669,7 +27667,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 && !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) { if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128) - return false; // Nothing to do! + return SDValue(); // Nothing to do! MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64); unsigned PermMask = 0; PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0); @@ -27681,9 +27679,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, DAG.getUNDEF(ShuffleVT), DAG.getConstant(PermMask, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // For masks that have been widened to 128-bit elements or more, @@ -27708,7 +27704,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, // Only allow legal mask types. if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) - return false; + return SDValue(); // Attempt to match the mask against known shuffle patterns. MVT ShuffleSrcVT, ShuffleVT; @@ -27736,9 +27732,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> HiMask(Mask.data() + Scale, NumMaskElts - Scale); if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) && isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) { - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, V1); } } @@ -27746,33 +27740,29 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT)) { if (Depth == 1 && Root.getOpcode() == Shuffle) - return false; // Nothing to do! + return SDValue(); // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) - return false; // AVX512 Writemask clash. + return SDValue(); // AVX512 Writemask clash. Res = DAG.getBitcast(ShuffleSrcVT, V1); DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { if (Depth == 1 && Root.getOpcode() == Shuffle) - return false; // Nothing to do! + return SDValue(); // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) - return false; // AVX512 Writemask clash. + return SDValue(); // AVX512 Writemask clash. Res = DAG.getBitcast(ShuffleVT, V1); DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, DAG.getConstant(PermuteImm, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } } @@ -27780,18 +27770,16 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT, UnaryShuffle)) { if (Depth == 1 && Root.getOpcode() == Shuffle) - return false; // Nothing to do! + return SDValue(); // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) - return false; // AVX512 Writemask clash. + return SDValue(); // AVX512 Writemask clash. V1 = DAG.getBitcast(ShuffleVT, V1); DCI.AddToWorklist(V1.getNode()); V2 = DAG.getBitcast(ShuffleVT, V2); DCI.AddToWorklist(V2.getNode()); Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, @@ -27799,9 +27787,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { if (Depth == 1 && Root.getOpcode() == Shuffle) - return false; // Nothing to do! + return SDValue(); // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) - return false; // AVX512 Writemask clash. + return SDValue(); // AVX512 Writemask clash. V1 = DAG.getBitcast(ShuffleVT, V1); DCI.AddToWorklist(V1.getNode()); V2 = DAG.getBitcast(ShuffleVT, V2); @@ -27809,9 +27797,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2, DAG.getConstant(PermuteImm, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // Typically from here on, we need an integer version of MaskVT. @@ -27824,21 +27810,19 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) { if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI) - return false; // Nothing to do! + return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); DCI.AddToWorklist(V1.getNode()); Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, DAG.getConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitIdx, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) - return false; // Nothing to do! + return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); DCI.AddToWorklist(V1.getNode()); V2 = DAG.getBitcast(IntMaskVT, V2); @@ -27847,16 +27831,14 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, DAG.getConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitIdx, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } } // Don't try to re-form single instruction chains under any circumstances now // that we've done encoding canonicalization for them. if (Depth < 2) - return false; + return SDValue(); bool MaskContainsZeros = any_of(Mask, [](int M) { return M == SM_SentinelZero; }); @@ -27879,9 +27861,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero @@ -27910,9 +27890,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, DCI.AddToWorklist(Zero.getNode()); Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, Res, VPermMask, Zero); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // If we have a dual input lane-crossing shuffle then lower to VPERMV3. @@ -27935,11 +27913,9 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, DCI.AddToWorklist(V2.getNode()); Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } - return false; + return SDValue(); } // See if we can combine a single input shuffle with zeros to a bit-mask, @@ -27969,9 +27945,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, FloatDomain ? unsigned(X86ISD::FAND) : unsigned(ISD::AND); Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // If we have a single input shuffle with different shuffle patterns in the @@ -27992,9 +27966,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // With XOP, binary shuffles of 128/256-bit floating point vectors can combine @@ -28033,9 +28005,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, DAG.getConstant(M2ZImm, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // If we have 3 or more shuffle instructions or a chain involving a variable @@ -28071,9 +28041,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, DCI.AddToWorklist(PSHUFBMaskOp.getNode()); Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // With XOP, if we have a 128-bit binary input shuffle we can always combine @@ -28109,13 +28077,11 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, DCI.AddToWorklist(VPPERMMaskOp.getNode()); Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp); DCI.AddToWorklist(Res.getNode()); - DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), - /*AddTo*/ true); - return true; + return DAG.getBitcast(RootVT, Res); } // Failed to find any combines. - return false; + return SDValue(); } // Attempt to constant fold all of the constant source ops. @@ -28434,8 +28400,14 @@ static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps, std::swap(Ops[0], Ops[1]); } - return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG, - DCI, Subtarget); + // Finally, try to combine into a single shuffle instruction. + SDValue Res = combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG, + DCI, Subtarget); + if (Res) { + DCI.CombineTo(Root.getNode(), Res, /*AddTo*/ true); + return true; + } + return false; } /// \brief Get the PSHUF-style mask from PSHUF node. |

