diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-03-27 10:25:02 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-03-27 10:25:02 +0000 |
commit | ccb71b2985a2a5073cedd1d734fb0fc2712d68f0 (patch) | |
tree | ceece4e12f9af39814a7e1b5b2b0a380de5495c4 /llvm/lib/Target | |
parent | ab0f18076b11972429e7d04cb818582f949f03b5 (diff) | |
download | bcm5719-llvm-ccb71b2985a2a5073cedd1d734fb0fc2712d68f0.tar.gz bcm5719-llvm-ccb71b2985a2a5073cedd1d734fb0fc2712d68f0.zip |
Revert rL356864 : [X86][SSE41] Start shuffle combining from ZERO_EXTEND_VECTOR_INREG (PR40685)
Enable SSE41 ZERO_EXTEND_VECTOR_INREG shuffle combines - for the PMOVZX(PSHUFD(V)) -> UNPCKH(V,0) pattern we reduce the shuffles (port5-bottleneck on Intel) at the expense of creating a zero (pxor v,v) and an extra register move - which is a good trade off as these are pretty cheap and in most cases it doesn't increase register pressure.
This also exposed a missed opportunity to use combine to ZERO_EXTEND_VECTOR_INREG with folded loads - even if we're in the float domain.
........
Causes PR41249
llvm-svn: 357057
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 61 |
1 files changed, 28 insertions, 33 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1bf029ac887..67631f72be4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30885,39 +30885,33 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, // Match against a ZERO_EXTEND_VECTOR_INREG/VZEXT instruction. // TODO: Add 512-bit vector support (split AVX512F and AVX512BW). - if ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) || - (MaskVT.is256BitVector() && Subtarget.hasInt256())) { - // Allow this with FloatDomain if we'll be able to fold the load. - SDValue BC1 = peekThroughOneUseBitcasts(V1); - if (AllowIntDomain || - (BC1.hasOneUse() && BC1.getOpcode() == ISD::SCALAR_TO_VECTOR && - MayFoldLoad(BC1.getOperand(0)))) { - unsigned MaxScale = 64 / MaskEltSize; - for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) { - bool Match = true; - unsigned NumDstElts = NumMaskElts / Scale; - for (unsigned i = 0; i != NumDstElts && Match; ++i) { - Match &= isUndefOrEqual(Mask[i * Scale], (int)i); - Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1); - } - if (Match) { - unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize); - MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() - : MVT::getIntegerVT(MaskEltSize); - SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize); - - if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits()) - V1 = extractSubVector(V1, 0, DAG, DL, SrcSize); - - if (SrcVT.getVectorNumElements() == NumDstElts) - Shuffle = unsigned(ISD::ZERO_EXTEND); - else - Shuffle = unsigned(ISD::ZERO_EXTEND_VECTOR_INREG); + if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) || + (MaskVT.is256BitVector() && Subtarget.hasInt256()))) { + unsigned MaxScale = 64 / MaskEltSize; + for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) { + bool Match = true; + unsigned NumDstElts = NumMaskElts / Scale; + for (unsigned i = 0; i != NumDstElts && Match; ++i) { + Match &= isUndefOrEqual(Mask[i * Scale], (int)i); + Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1); + } + if (Match) { + unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize); + MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() : + MVT::getIntegerVT(MaskEltSize); + SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize); + + if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits()) + V1 = extractSubVector(V1, 0, DAG, DL, SrcSize); + + if (SrcVT.getVectorNumElements() == NumDstElts) + Shuffle = unsigned(ISD::ZERO_EXTEND); + else + Shuffle = unsigned(ISD::ZERO_EXTEND_VECTOR_INREG); - DstVT = MVT::getIntegerVT(Scale * MaskEltSize); - DstVT = MVT::getVectorVT(DstVT, NumDstElts); - return true; - } + DstVT = MVT::getIntegerVT(Scale * MaskEltSize); + DstVT = MVT::getVectorVT(DstVT, NumDstElts); + return true; } } } @@ -42622,7 +42616,8 @@ static SDValue combineExtInVec(SDNode *N, SelectionDAG &DAG, return DAG.getNode(N->getOpcode(), SDLoc(N), VT, In.getOperand(0)); // Attempt to combine as a shuffle. - if (Subtarget.hasSSE41() && N->getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) { + // TODO: SSE41 support + if (Subtarget.hasAVX() && N->getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) { SDValue Op(N, 0); if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType())) if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |