summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-02-19 15:57:09 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-02-19 15:57:09 +0000
commit952abcefe42837346eee8ee5e4fd3add7388c04a (patch)
tree2cbaed6315cd714f0bfc5325e701e8b0396f5b72 /llvm/lib/Target
parentbaff199877380a0b7269f7949ad7fb8740bab97a (diff)
downloadbcm5719-llvm-952abcefe42837346eee8ee5e4fd3add7388c04a.tar.gz
bcm5719-llvm-952abcefe42837346eee8ee5e4fd3add7388c04a.zip
[X86][AVX] EltsFromConsecutiveLoads - Add BROADCAST lowering support
This patch adds scalar/subvector BROADCAST handling to EltsFromConsecutiveLoads. It mainly shows codegen changes to 32-bit code which failed to handle i64 loads, although 64-bit code is also using this new path to more efficiently combine to a broadcast load. Differential Revision: https://reviews.llvm.org/D58053 llvm-svn: 354340
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp73
1 files changed, 70 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c688731010e..6c284baecae 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7384,12 +7384,15 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
VT.is256BitVector() && !Subtarget.hasInt256())
return SDValue();
+ if (NumElems == 1)
+ return DAG.getBitcast(VT, Elts[FirstLoadedElt]);
+
if (IsConsecutiveLoad)
return CreateLoad(VT, LDBase);
// IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
// vector and a zero vector to clear out the zero elements.
- if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) {
+ if (!isAfterLegalize && VT.isVector() && NumElems == VT.getVectorNumElements()) {
SmallVector<int, 4> ClearMask(NumElems, -1);
for (unsigned i = 0; i < NumElems; ++i) {
if (ZeroMask[i])
@@ -7404,8 +7407,23 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
}
}
- int LoadSize =
- (1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits();
+ unsigned BaseSize = LDBaseVT.getStoreSizeInBits();
+ int LoadSize = (1 + LastLoadedElt - FirstLoadedElt) * BaseSize;
+
+ // If the upper half of a ymm/zmm load is undef then just load the lower half.
+ if (VT.is256BitVector() || VT.is512BitVector()) {
+ unsigned HalfNumElems = NumElems / 2;
+ if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnesValue()) {
+ EVT HalfVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems);
+ SDValue HalfLD =
+ EltsFromConsecutiveLoads(HalfVT, Elts.drop_back(HalfNumElems), DL,
+ DAG, Subtarget, isAfterLegalize);
+ if (HalfLD)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
+ HalfLD, DAG.getIntPtrConstant(0, DL));
+ }
+ }
// VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
@@ -7428,6 +7446,55 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
}
}
+ // BROADCAST - match the smallest possible repetition pattern, load that
+ // scalar/subvector element and then broadcast to the entire vector.
+ if (ZeroMask.isNullValue() && isPowerOf2_32(NumElems) &&
+ (BaseSize % 8) == 0 && Subtarget.hasAVX() &&
+ (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) {
+ for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) {
+ unsigned RepeatSize = SubElems * BaseSize;
+ unsigned ScalarSize = std::min(RepeatSize, 64u);
+ if (!Subtarget.hasAVX2() && ScalarSize < 32)
+ continue;
+
+ bool Match = true;
+ SmallVector<SDValue, 8> RepeatedLoads(SubElems, DAG.getUNDEF(LDBaseVT));
+ for (unsigned i = 0; i != NumElems && Match; ++i) {
+ if (!LoadMask[i])
+ continue;
+ SDValue Elt = peekThroughBitcasts(Elts[i]);
+ if (RepeatedLoads[i % SubElems].isUndef())
+ RepeatedLoads[i % SubElems] = Elt;
+ else
+ Match &= (RepeatedLoads[i % SubElems] == Elt);
+ }
+
+ // We must have loads at both ends of the repetition.
+ Match &= !RepeatedLoads.front().isUndef();
+ Match &= !RepeatedLoads.back().isUndef();
+ if (!Match)
+ continue;
+
+ EVT RepeatVT =
+ VT.isInteger() && (RepeatSize != 64 || TLI.isTypeLegal(MVT::i64))
+ ? EVT::getIntegerVT(*DAG.getContext(), ScalarSize)
+ : EVT::getFloatingPointVT(ScalarSize);
+ if (RepeatSize > ScalarSize)
+ RepeatVT = EVT::getVectorVT(*DAG.getContext(), RepeatVT,
+ RepeatSize / ScalarSize);
+ if (SDValue RepeatLoad = EltsFromConsecutiveLoads(
+ RepeatVT, RepeatedLoads, DL, DAG, Subtarget, isAfterLegalize)) {
+ EVT BroadcastVT =
+ EVT::getVectorVT(*DAG.getContext(), RepeatVT.getScalarType(),
+ VT.getSizeInBits() / ScalarSize);
+ unsigned Opcode = RepeatSize > ScalarSize ? X86ISD::SUBV_BROADCAST
+ : X86ISD::VBROADCAST;
+ SDValue Broadcast = DAG.getNode(Opcode, DL, BroadcastVT, RepeatLoad);
+ return DAG.getBitcast(VT, Broadcast);
+ }
+ }
+ }
+
return SDValue();
}
OpenPOWER on IntegriCloud