summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp8
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp5
-rw-r--r--llvm/test/CodeGen/X86/avx-vperm2x128.ll20
3 files changed, 18 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 9777c0d85e9..b1c69e779f3 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -255,15 +255,13 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
SmallVectorImpl<int> &ShuffleMask) {
- if (Imm & 0x88)
- return; // Not a shuffle
-
unsigned HalfSize = VT.getVectorNumElements() / 2;
for (unsigned l = 0; l != 2; ++l) {
- unsigned HalfBegin = ((Imm >> (l * 4)) & 0x3) * HalfSize;
+ unsigned HalfMask = Imm >> (l * 4);
+ unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
- ShuffleMask.push_back(i);
+ ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : i);
}
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a92ab5ae2a0..5ad31b742a6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4390,6 +4390,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
/// IsUnary to true if only uses one source. Note that this will set IsUnary for
/// shuffles which use a single input multiple times, and in those cases it will
/// adjust the mask to only have indices within that single input.
+/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
static bool getTargetShuffleMask(SDNode *N, MVT VT,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
@@ -4519,6 +4520,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
if (Mask.empty()) return false;
+ // Mask only contains negative index if an element is zero.
+ if (std::any_of(Mask.begin(), Mask.end(),
+ [](int M){ return M == SM_SentinelZero; }))
+ return false;
break;
case X86ISD::MOVSLDUP:
DecodeMOVSLDUPMask(VT, Mask);
diff --git a/llvm/test/CodeGen/X86/avx-vperm2x128.ll b/llvm/test/CodeGen/X86/avx-vperm2x128.ll
index 74d20f348b5..4e43f6f5192 100644
--- a/llvm/test/CodeGen/X86/avx-vperm2x128.ll
+++ b/llvm/test/CodeGen/X86/avx-vperm2x128.ll
@@ -269,7 +269,7 @@ entry:
define <4 x double> @vperm2z_0x08(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x08:
; ALL: # BB#0:
-; ALL-NEXT: vperm2f128 $40, %ymm0, %ymm0, %ymm0
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x double> %s
@@ -279,7 +279,7 @@ define <4 x double> @vperm2z_0x18(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x18:
; ALL: # BB#0:
; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
-; ALL-NEXT: vblendpd $12, %ymm0, %ymm1, %ymm0
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
ret <4 x double> %s
@@ -288,7 +288,7 @@ define <4 x double> @vperm2z_0x18(<4 x double> %a) {
define <4 x double> @vperm2z_0x28(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x28:
; ALL: # BB#0:
-; ALL-NEXT: vperm2f128 $40, %ymm0, %ymm0, %ymm0
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x double> %s
@@ -298,7 +298,7 @@ define <4 x double> @vperm2z_0x38(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x38:
; ALL: # BB#0:
; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
-; ALL-NEXT: vblendpd $12, %ymm0, %ymm1, %ymm0
+; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x double> %s
@@ -307,7 +307,7 @@ define <4 x double> @vperm2z_0x38(<4 x double> %a) {
define <4 x double> @vperm2z_0x80(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x80:
; ALL: # BB#0:
-; ALL-NEXT: vperm2f128 $128, %ymm0, %ymm0, %ymm0
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x double> %s
@@ -316,7 +316,7 @@ define <4 x double> @vperm2z_0x80(<4 x double> %a) {
define <4 x double> @vperm2z_0x81(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x81:
; ALL: # BB#0:
-; ALL-NEXT: vperm2f128 $129, %ymm0, %ymm0, %ymm0
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
; ALL-NEXT: retq
%s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
ret <4 x double> %s
@@ -325,7 +325,7 @@ define <4 x double> @vperm2z_0x81(<4 x double> %a) {
define <4 x double> @vperm2z_0x82(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x82:
; ALL: # BB#0:
-; ALL-NEXT: vperm2f128 $128, %ymm0, %ymm0, %ymm0
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x double> %s
@@ -334,7 +334,7 @@ define <4 x double> @vperm2z_0x82(<4 x double> %a) {
define <4 x double> @vperm2z_0x83(<4 x double> %a) {
; ALL-LABEL: vperm2z_0x83:
; ALL: # BB#0:
-; ALL-NEXT: vperm2f128 $129, %ymm0, %ymm0, %ymm0
+; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
; ALL-NEXT: retq
%s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x double> %s
@@ -345,8 +345,8 @@ define <4 x double> @vperm2z_0x83(<4 x double> %a) {
define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) {
; ALL-LABEL: vperm2z_int_0x83:
; ALL: # BB#0:
-; AVX1: vperm2f128 $129, %ymm0, %ymm0, %ymm0
-; AVX2: vperm2i128 $129, %ymm0, %ymm0, %ymm0
+; AVX1: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX2: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
%s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
%c = add <4 x i64> %b, %s
ret <4 x i64> %c
OpenPOWER on IntegriCloud