diff options
author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2015-03-08 16:28:47 +0000 |
---|---|---|
committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2015-03-08 16:28:47 +0000 |
commit | 6c7d70469cd0ae8026f7e97b73e19478c164dece (patch) | |
tree | 23d06f5554ca118287bfd494b9ba46560a9d35c0 /llvm/lib | |
parent | 8f3c0cd1bacab79154c84b8891f4afa80925ba2c (diff) | |
download | bcm5719-llvm-6c7d70469cd0ae8026f7e97b73e19478c164dece.tar.gz bcm5719-llvm-6c7d70469cd0ae8026f7e97b73e19478c164dece.zip |
[X86][AVX] Fix wrong lowering of VPERM2X128 nodes
There were cases where the backend computed a wrong permute mask for a VPERM2X128 node.
Example:
\code
define <8 x float> @foo(<8 x float> %a, <8 x float> %b) {
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7>
ret <8 x float> %shuffle
}
\code end
Before this patch, llc (with -mattr=+avx) emitted the following vperm2f128:
vperm2f128 $0, %ymm0, %ymm0, %ymm0 # ymm0 = ymm0[0,1,0,1]
With this patch, llc emits a vperm2f128 with a correct permute mask:
vperm2f128 $17, %ymm0, %ymm0, %ymm0 # ymm0 = ymm0[2,3,2,3]
Differential Revision: http://reviews.llvm.org/D8119
llvm-svn: 231601
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 37b8f6addc3..5ff69ba03c6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9039,7 +9039,15 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1, // Otherwise form a 128-bit permutation. // FIXME: Detect zero-vector inputs and use the VPERM2X128 to zero that half. - unsigned PermMask = Mask[0] / 2 | (Mask[2] / 2) << 4; + int MaskLO = Mask[0]; + if (MaskLO == SM_SentinelUndef) + MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1]; + + int MaskHI = Mask[2]; + if (MaskHI == SM_SentinelUndef) + MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3]; + + unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4; return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, DAG.getConstant(PermMask, MVT::i8)); } |