diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-01-18 20:59:04 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-01-18 20:59:04 +0000 |
commit | 3e5fb61978b7be692ec1d9033ded68eb6f1886fe (patch) | |
tree | 6c2bee79972e1f431510cfefa6c88675674ff6d0 /llvm/lib | |
parent | a94ae1e05ba3aeb57bc750ca24b11e5923d0a597 (diff) | |
download | bcm5719-llvm-3e5fb61978b7be692ec1d9033ded68eb6f1886fe.tar.gz bcm5719-llvm-3e5fb61978b7be692ec1d9033ded68eb6f1886fe.zip |
[X86][AVX2] Broadcast subvectors
AVX2 can only broadcast from the zero'th element of a vector, but if the broadcastable element is the zero'th element of a 128-bit subvector its advantageous to extract the subvector, broadcast from that and avoid the loading of shuffle mask data that would be needed for VPERMPS/VPERMD. The only exception being when the source type is 4f64 or 4i64 which can directly use the immediate shuffle VPERMPD/VPERMQ directly.
Differential Revision: http://reviews.llvm.org/D16050
llvm-svn: 258081
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 24 |
1 files changed, 21 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 866d11f62dc..322a0133b29 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8291,10 +8291,28 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize())); - } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) { - // We can't broadcast from a vector register without AVX2, and we can only - // broadcast from the zero-element of a vector register. + } else if (!Subtarget->hasAVX2()) { + // We can't broadcast from a vector register without AVX2. return SDValue(); + } else if (BroadcastIdx != 0) { + // We can only broadcast from the zero-element of a vector register, + // but it can be advantageous to broadcast from the zero-element of a + // subvector. + if (!VT.is256BitVector() && !VT.is512BitVector()) + return SDValue(); + + // VPERMQ/VPERMPD can perform the cross-lane shuffle directly. + if (VT == MVT::v4f64 || VT == MVT::v4i64) + return SDValue(); + + // Only broadcast the zero-element of a 128-bit subvector. + unsigned EltSize = VT.getScalarSizeInBits(); + if (((BroadcastIdx * EltSize) % 128) != 0) + return SDValue(); + + MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 128 / EltSize); + V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V, + DAG.getIntPtrConstant(BroadcastIdx, DL)); } V = DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, V); |