summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-01-18 20:59:04 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-01-18 20:59:04 +0000
commit3e5fb61978b7be692ec1d9033ded68eb6f1886fe (patch)
tree6c2bee79972e1f431510cfefa6c88675674ff6d0 /llvm/lib
parenta94ae1e05ba3aeb57bc750ca24b11e5923d0a597 (diff)
downloadbcm5719-llvm-3e5fb61978b7be692ec1d9033ded68eb6f1886fe.tar.gz
bcm5719-llvm-3e5fb61978b7be692ec1d9033ded68eb6f1886fe.zip
[X86][AVX2] Broadcast subvectors
AVX2 can only broadcast from the zero'th element of a vector, but if the broadcastable element is the zero'th element of a 128-bit subvector its advantageous to extract the subvector, broadcast from that and avoid the loading of shuffle mask data that would be needed for VPERMPS/VPERMD. The only exception being when the source type is 4f64 or 4i64 which can directly use the immediate shuffle VPERMPD/VPERMQ directly. Differential Revision: http://reviews.llvm.org/D16050 llvm-svn: 258081
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp24
1 files changed, 21 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 866d11f62dc..322a0133b29 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8291,10 +8291,28 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
DAG.getMachineFunction().getMachineMemOperand(
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
- } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) {
- // We can't broadcast from a vector register without AVX2, and we can only
- // broadcast from the zero-element of a vector register.
+ } else if (!Subtarget->hasAVX2()) {
+ // We can't broadcast from a vector register without AVX2.
return SDValue();
+ } else if (BroadcastIdx != 0) {
+ // We can only broadcast from the zero-element of a vector register,
+ // but it can be advantageous to broadcast from the zero-element of a
+ // subvector.
+ if (!VT.is256BitVector() && !VT.is512BitVector())
+ return SDValue();
+
+ // VPERMQ/VPERMPD can perform the cross-lane shuffle directly.
+ if (VT == MVT::v4f64 || VT == MVT::v4i64)
+ return SDValue();
+
+ // Only broadcast the zero-element of a 128-bit subvector.
+ unsigned EltSize = VT.getScalarSizeInBits();
+ if (((BroadcastIdx * EltSize) % 128) != 0)
+ return SDValue();
+
+ MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 128 / EltSize);
+ V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V,
+ DAG.getIntPtrConstant(BroadcastIdx, DL));
}
V = DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, V);
OpenPOWER on IntegriCloud