diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 24 |
1 files changed, 21 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 866d11f62dc..322a0133b29 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8291,10 +8291,28 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize())); - } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) { - // We can't broadcast from a vector register without AVX2, and we can only - // broadcast from the zero-element of a vector register. + } else if (!Subtarget->hasAVX2()) { + // We can't broadcast from a vector register without AVX2. return SDValue(); + } else if (BroadcastIdx != 0) { + // We can only broadcast from the zero-element of a vector register, + // but it can be advantageous to broadcast from the zero-element of a + // subvector. + if (!VT.is256BitVector() && !VT.is512BitVector()) + return SDValue(); + + // VPERMQ/VPERMPD can perform the cross-lane shuffle directly. + if (VT == MVT::v4f64 || VT == MVT::v4i64) + return SDValue(); + + // Only broadcast the zero-element of a 128-bit subvector. + unsigned EltSize = VT.getScalarSizeInBits(); + if (((BroadcastIdx * EltSize) % 128) != 0) + return SDValue(); + + MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 128 / EltSize); + V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V, + DAG.getIntPtrConstant(BroadcastIdx, DL)); } V = DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, V); |