summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp18
-rw-r--r--llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll15
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll3
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll61
4 files changed, 59 insertions, 38 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d976c57ac0f..8856532ceed 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24602,23 +24602,27 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
}
// Attempt to blend with zero.
- if (VT.getVectorNumElements() <= 8 &&
+ if (NumMaskElts <= 8 &&
((Subtarget.hasSSE41() && VT.is128BitVector()) ||
(Subtarget.hasAVX() && VT.is256BitVector()))) {
// Convert VT to a type compatible with X86ISD::BLENDI.
// TODO - add 16i16 support (requires lane duplication).
- MVT ShuffleVT = VT;
+ bool FloatDomain = VT.isFloatingPoint();
+ MVT ShuffleVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits)
+ : MVT::getIntegerVT(MaskEltSizeInBits);
+ ShuffleVT = MVT::getVectorVT(ShuffleVT, NumMaskElts);
+
if (Subtarget.hasAVX2()) {
- if (VT == MVT::v4i64)
+ if (ShuffleVT == MVT::v4i64)
ShuffleVT = MVT::v8i32;
- else if (VT == MVT::v2i64)
+ else if (ShuffleVT == MVT::v2i64)
ShuffleVT = MVT::v4i32;
} else {
- if (VT == MVT::v2i64 || VT == MVT::v4i32)
+ if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32)
ShuffleVT = MVT::v8i16;
- else if (VT == MVT::v4i64)
+ else if (ShuffleVT == MVT::v4i64)
ShuffleVT = MVT::v4f64;
- else if (VT == MVT::v8i32)
+ else if (ShuffleVT == MVT::v8i32)
ShuffleVT = MVT::v8f32;
}
diff --git a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
index 039f5cd89ef..e05451b8027 100644
--- a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
+++ b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll
@@ -347,16 +347,11 @@ define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind {
; SSE-NEXT: pinsrw $7, %eax, %xmm0
; SSE-NEXT: retq
;
-; AVX1-LABEL: _clearupper4xi32b:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: _clearupper4xi32b:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[4,5],zero,zero,xmm0[8,9],zero,zero,xmm0[12,13],zero,zero
-; AVX2-NEXT: retq
+; AVX-LABEL: _clearupper4xi32b:
+; AVX: # BB#0:
+; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; AVX-NEXT: retq
%x16 = bitcast <4 x i32> %0 to <8 x i16>
%r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1
%r1 = insertelement <8 x i16> %r0, i16 zeroinitializer, i32 3
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
index 8ad8e136587..f39aa4e93e7 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -107,7 +107,8 @@ define <16 x i8> @combine_vpperm_identity_bitcast(<16 x i8> %a0, <16 x i8> %a1)
define <16 x i8> @combine_vpperm_as_blend_with_zero(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: combine_vpperm_as_blend_with_zero:
; CHECK: # BB#0:
-; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4,5,6,7]
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 0, i8 1, i8 128, i8 129, i8 4, i8 5, i8 6, i8 7, i8 130, i8 131, i8 132, i8 133, i8 134, i8 135, i8 136, i8 137>)
ret <16 x i8> %res0
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll b/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll
index 6dbb8f62f6f..7dfbd565223 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll
@@ -1182,26 +1182,47 @@ define <8 x i16> @var_shuffle_v8i16_v8i16_xyxyxy00_i16(<8 x i16> %x, <8 x i16> %
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
; SSE41-NEXT: retq
;
-; AVX-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
-; AVX: # BB#0:
-; AVX-NEXT: movswq %di, %r10
-; AVX-NEXT: movswq %si, %r11
-; AVX-NEXT: movswq %dx, %rdx
-; AVX-NEXT: movswq %cx, %rcx
-; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: movswq %r8w, %rdi
-; AVX-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
-; AVX-NEXT: movswq %r9w, %rax
-; AVX-NEXT: movzwl -40(%rsp,%r10,2), %esi
-; AVX-NEXT: vmovd %esi, %xmm0
-; AVX-NEXT: vpinsrw $1, -24(%rsp,%r11,2), %xmm0, %xmm0
-; AVX-NEXT: vpinsrw $2, -40(%rsp,%rdx,2), %xmm0, %xmm0
-; AVX-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
-; AVX-NEXT: vpinsrw $4, -40(%rsp,%rdi,2), %xmm0, %xmm0
-; AVX-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
-; AVX-NEXT: retq
+; AVX1-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
+; AVX1: # BB#0:
+; AVX1-NEXT: movswq %di, %r10
+; AVX1-NEXT: movswq %si, %r11
+; AVX1-NEXT: movswq %dx, %rdx
+; AVX1-NEXT: movswq %cx, %rcx
+; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: movswq %r8w, %rdi
+; AVX1-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
+; AVX1-NEXT: movswq %r9w, %rax
+; AVX1-NEXT: movzwl -40(%rsp,%r10,2), %esi
+; AVX1-NEXT: vmovd %esi, %xmm0
+; AVX1-NEXT: vpinsrw $1, -24(%rsp,%r11,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $2, -40(%rsp,%rdx,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $4, -40(%rsp,%rdi,2), %xmm0, %xmm0
+; AVX1-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: var_shuffle_v8i16_v8i16_xyxyxy00_i16:
+; AVX2: # BB#0:
+; AVX2-NEXT: movswq %di, %r10
+; AVX2-NEXT: movswq %si, %r11
+; AVX2-NEXT: movswq %dx, %rdx
+; AVX2-NEXT: movswq %cx, %rcx
+; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movswq %r8w, %rdi
+; AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT: movswq %r9w, %rax
+; AVX2-NEXT: movzwl -40(%rsp,%r10,2), %esi
+; AVX2-NEXT: vmovd %esi, %xmm0
+; AVX2-NEXT: vpinsrw $1, -24(%rsp,%r11,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $2, -40(%rsp,%rdx,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $4, -40(%rsp,%rdi,2), %xmm0, %xmm0
+; AVX2-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
+; AVX2-NEXT: retq
%x0 = extractelement <8 x i16> %x, i16 %i0
%y1 = extractelement <8 x i16> %y, i16 %i1
%x2 = extractelement <8 x i16> %x, i16 %i2
OpenPOWER on IntegriCloud