diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-02-19 15:57:09 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-02-19 15:57:09 +0000 |
| commit | 952abcefe42837346eee8ee5e4fd3add7388c04a (patch) | |
| tree | 2cbaed6315cd714f0bfc5325e701e8b0396f5b72 /llvm/lib/Target/X86 | |
| parent | baff199877380a0b7269f7949ad7fb8740bab97a (diff) | |
| download | bcm5719-llvm-952abcefe42837346eee8ee5e4fd3add7388c04a.tar.gz bcm5719-llvm-952abcefe42837346eee8ee5e4fd3add7388c04a.zip | |
[X86][AVX] EltsFromConsecutiveLoads - Add BROADCAST lowering support
This patch adds scalar/subvector BROADCAST handling to EltsFromConsecutiveLoads.
It mainly shows codegen changes to 32-bit code which failed to handle i64 loads, although 64-bit code is also using this new path to more efficiently combine to a broadcast load.
Differential Revision: https://reviews.llvm.org/D58053
llvm-svn: 354340
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 73 |
1 files changed, 70 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c688731010e..6c284baecae 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7384,12 +7384,15 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, VT.is256BitVector() && !Subtarget.hasInt256()) return SDValue(); + if (NumElems == 1) + return DAG.getBitcast(VT, Elts[FirstLoadedElt]); + if (IsConsecutiveLoad) return CreateLoad(VT, LDBase); // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded // vector and a zero vector to clear out the zero elements. - if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) { + if (!isAfterLegalize && VT.isVector() && NumElems == VT.getVectorNumElements()) { SmallVector<int, 4> ClearMask(NumElems, -1); for (unsigned i = 0; i < NumElems; ++i) { if (ZeroMask[i]) @@ -7404,8 +7407,23 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, } } - int LoadSize = - (1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits(); + unsigned BaseSize = LDBaseVT.getStoreSizeInBits(); + int LoadSize = (1 + LastLoadedElt - FirstLoadedElt) * BaseSize; + + // If the upper half of a ymm/zmm load is undef then just load the lower half. + if (VT.is256BitVector() || VT.is512BitVector()) { + unsigned HalfNumElems = NumElems / 2; + if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnesValue()) { + EVT HalfVT = + EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems); + SDValue HalfLD = + EltsFromConsecutiveLoads(HalfVT, Elts.drop_back(HalfNumElems), DL, + DAG, Subtarget, isAfterLegalize); + if (HalfLD) + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), + HalfLD, DAG.getIntPtrConstant(0, DL)); + } + } // VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs. if (IsConsecutiveLoad && FirstLoadedElt == 0 && @@ -7428,6 +7446,55 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, } } + // BROADCAST - match the smallest possible repetition pattern, load that + // scalar/subvector element and then broadcast to the entire vector. + if (ZeroMask.isNullValue() && isPowerOf2_32(NumElems) && + (BaseSize % 8) == 0 && Subtarget.hasAVX() && + (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) { + for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) { + unsigned RepeatSize = SubElems * BaseSize; + unsigned ScalarSize = std::min(RepeatSize, 64u); + if (!Subtarget.hasAVX2() && ScalarSize < 32) + continue; + + bool Match = true; + SmallVector<SDValue, 8> RepeatedLoads(SubElems, DAG.getUNDEF(LDBaseVT)); + for (unsigned i = 0; i != NumElems && Match; ++i) { + if (!LoadMask[i]) + continue; + SDValue Elt = peekThroughBitcasts(Elts[i]); + if (RepeatedLoads[i % SubElems].isUndef()) + RepeatedLoads[i % SubElems] = Elt; + else + Match &= (RepeatedLoads[i % SubElems] == Elt); + } + + // We must have loads at both ends of the repetition. + Match &= !RepeatedLoads.front().isUndef(); + Match &= !RepeatedLoads.back().isUndef(); + if (!Match) + continue; + + EVT RepeatVT = + VT.isInteger() && (RepeatSize != 64 || TLI.isTypeLegal(MVT::i64)) + ? EVT::getIntegerVT(*DAG.getContext(), ScalarSize) + : EVT::getFloatingPointVT(ScalarSize); + if (RepeatSize > ScalarSize) + RepeatVT = EVT::getVectorVT(*DAG.getContext(), RepeatVT, + RepeatSize / ScalarSize); + if (SDValue RepeatLoad = EltsFromConsecutiveLoads( + RepeatVT, RepeatedLoads, DL, DAG, Subtarget, isAfterLegalize)) { + EVT BroadcastVT = + EVT::getVectorVT(*DAG.getContext(), RepeatVT.getScalarType(), + VT.getSizeInBits() / ScalarSize); + unsigned Opcode = RepeatSize > ScalarSize ? X86ISD::SUBV_BROADCAST + : X86ISD::VBROADCAST; + SDValue Broadcast = DAG.getNode(Opcode, DL, BroadcastVT, RepeatLoad); + return DAG.getBitcast(VT, Broadcast); + } + } + } + return SDValue(); } |

