summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2015-03-08 16:28:47 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2015-03-08 16:28:47 +0000
commit6c7d70469cd0ae8026f7e97b73e19478c164dece (patch)
tree23d06f5554ca118287bfd494b9ba46560a9d35c0 /llvm/lib
parent8f3c0cd1bacab79154c84b8891f4afa80925ba2c (diff)
downloadbcm5719-llvm-6c7d70469cd0ae8026f7e97b73e19478c164dece.tar.gz
bcm5719-llvm-6c7d70469cd0ae8026f7e97b73e19478c164dece.zip
[X86][AVX] Fix wrong lowering of VPERM2X128 nodes
There were cases where the backend computed a wrong permute mask for a VPERM2X128 node. Example: \code define <8 x float> @foo(<8 x float> %a, <8 x float> %b) { %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 undef, i32 6, i32 7> ret <8 x float> %shuffle } \code end Before this patch, llc (with -mattr=+avx) emitted the following vperm2f128: vperm2f128 $0, %ymm0, %ymm0, %ymm0 # ymm0 = ymm0[0,1,0,1] With this patch, llc emits a vperm2f128 with a correct permute mask: vperm2f128 $17, %ymm0, %ymm0, %ymm0 # ymm0 = ymm0[2,3,2,3] Differential Revision: http://reviews.llvm.org/D8119 llvm-svn: 231601
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
1 files changed, 9 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 37b8f6addc3..5ff69ba03c6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9039,7 +9039,15 @@ static SDValue lowerV2X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
// Otherwise form a 128-bit permutation.
// FIXME: Detect zero-vector inputs and use the VPERM2X128 to zero that half.
- unsigned PermMask = Mask[0] / 2 | (Mask[2] / 2) << 4;
+ int MaskLO = Mask[0];
+ if (MaskLO == SM_SentinelUndef)
+ MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
+
+ int MaskHI = Mask[2];
+ if (MaskHI == SM_SentinelUndef)
+ MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
+
+ unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
DAG.getConstant(PermMask, MVT::i8));
}
OpenPOWER on IntegriCloud