summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-02-22 07:03:25 +0000
committerCraig Topper <craig.topper@intel.com>2019-02-22 07:03:25 +0000
commitfa6187d230651cc7334d224e3e86720331be7681 (patch)
treedb693f996eb6fac869b038f10ea5f1acb69d1ce0 /llvm/test/CodeGen/X86
parent069cf05e87325a145a4941e5959ed814600cd5a4 (diff)
downloadbcm5719-llvm-fa6187d230651cc7334d224e3e86720331be7681.tar.gz
bcm5719-llvm-fa6187d230651cc7334d224e3e86720331be7681.zip
[LegalizeVectorOps] Improve the placement of ANDs in the ExpandLoad path for non-byte-sized loads.
When we need to merge two adjacent loads the AND mask for the low piece was still sized for the full src element size. But we didn't have that many bits. The upper bits are already zero due to the SRL. So we can skip the AND if we're going to combine with the high bits. We do need an AND to clear out any bits from the high part. We were anding the high part before combining with the low part, but it looks like ANDing after the OR gets better results. So we can just emit the final AND after the optional concatentation is done. That will handling skipping before the OR and get rid of extra high bits after the OR. llvm-svn: 354655
Diffstat (limited to 'llvm/test/CodeGen/X86')
-rw-r--r--llvm/test/CodeGen/X86/vector-zext-widen.ll152
-rw-r--r--llvm/test/CodeGen/X86/vector-zext.ll152
2 files changed, 174 insertions, 130 deletions
diff --git a/llvm/test/CodeGen/X86/vector-zext-widen.ll b/llvm/test/CodeGen/X86/vector-zext-widen.ll
index 5e9c2a6b111..4d7a4f36659 100644
--- a/llvm/test/CodeGen/X86/vector-zext-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-zext-widen.ll
@@ -2285,58 +2285,49 @@ define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) {
define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) {
; SSE2-LABEL: zext_4i17_to_4i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: movl 8(%rdi), %eax
-; SSE2-NEXT: andl $15, %eax
-; SSE2-NEXT: shll $13, %eax
-; SSE2-NEXT: movq (%rdi), %rcx
-; SSE2-NEXT: movq %rcx, %rdx
-; SSE2-NEXT: shrq $51, %rdx
-; SSE2-NEXT: orl %eax, %edx
-; SSE2-NEXT: movd %edx, %xmm0
-; SSE2-NEXT: movq %rcx, %rax
-; SSE2-NEXT: shrq $34, %rax
-; SSE2-NEXT: andl $131071, %eax # imm = 0x1FFFF
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT: movl %ecx, %eax
-; SSE2-NEXT: andl $131071, %eax # imm = 0x1FFFF
+; SSE2-NEXT: movq (%rdi), %rax
; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq $17, %rcx
-; SSE2-NEXT: andl $131071, %ecx # imm = 0x1FFFF
-; SSE2-NEXT: movd %ecx, %xmm2
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movl 8(%rdi), %ecx
+; SSE2-NEXT: shll $13, %ecx
+; SSE2-NEXT: movq %rax, %rdx
+; SSE2-NEXT: shrq $51, %rdx
+; SSE2-NEXT: orl %ecx, %edx
+; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: shrq $34, %rax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: zext_4i17_to_4i32:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movl 8(%rdi), %eax
-; SSSE3-NEXT: andl $15, %eax
-; SSSE3-NEXT: shll $13, %eax
-; SSSE3-NEXT: movq (%rdi), %rcx
-; SSSE3-NEXT: movq %rcx, %rdx
-; SSSE3-NEXT: shrq $51, %rdx
-; SSSE3-NEXT: orl %eax, %edx
-; SSSE3-NEXT: movd %edx, %xmm0
-; SSSE3-NEXT: movq %rcx, %rax
-; SSSE3-NEXT: shrq $34, %rax
-; SSSE3-NEXT: andl $131071, %eax # imm = 0x1FFFF
-; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSSE3-NEXT: movl %ecx, %eax
-; SSSE3-NEXT: andl $131071, %eax # imm = 0x1FFFF
+; SSSE3-NEXT: movq (%rdi), %rax
; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: movq %rax, %rcx
; SSSE3-NEXT: shrq $17, %rcx
-; SSSE3-NEXT: andl $131071, %ecx # imm = 0x1FFFF
-; SSSE3-NEXT: movd %ecx, %xmm2
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: movl 8(%rdi), %ecx
+; SSSE3-NEXT: shll $13, %ecx
+; SSSE3-NEXT: movq %rax, %rdx
+; SSSE3-NEXT: shrq $51, %rdx
+; SSSE3-NEXT: orl %ecx, %edx
+; SSSE3-NEXT: movd %edx, %xmm1
+; SSSE3-NEXT: shrq $34, %rax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: zext_4i17_to_4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: movl 8(%rdi), %eax
-; SSE41-NEXT: andl $15, %eax
; SSE41-NEXT: shll $13, %eax
; SSE41-NEXT: movq (%rdi), %rcx
; SSE41-NEXT: movq %rcx, %rdx
@@ -2344,38 +2335,69 @@ define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) {
; SSE41-NEXT: orl %eax, %edx
; SSE41-NEXT: movq %rcx, %rax
; SSE41-NEXT: shrq $17, %rax
-; SSE41-NEXT: andl $131071, %eax # imm = 0x1FFFF
-; SSE41-NEXT: movl %ecx, %esi
-; SSE41-NEXT: andl $131071, %esi # imm = 0x1FFFF
-; SSE41-NEXT: movd %esi, %xmm0
+; SSE41-NEXT: movd %ecx, %xmm0
; SSE41-NEXT: pinsrd $1, %eax, %xmm0
; SSE41-NEXT: shrq $34, %rcx
-; SSE41-NEXT: andl $131071, %ecx # imm = 0x1FFFF
; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
; SSE41-NEXT: pinsrd $3, %edx, %xmm0
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: zext_4i17_to_4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: movl 8(%rdi), %eax
-; AVX-NEXT: andl $15, %eax
-; AVX-NEXT: shll $13, %eax
-; AVX-NEXT: movq (%rdi), %rcx
-; AVX-NEXT: movq %rcx, %rdx
-; AVX-NEXT: shrq $51, %rdx
-; AVX-NEXT: orl %eax, %edx
-; AVX-NEXT: movq %rcx, %rax
-; AVX-NEXT: shrq $17, %rax
-; AVX-NEXT: andl $131071, %eax # imm = 0x1FFFF
-; AVX-NEXT: movl %ecx, %esi
-; AVX-NEXT: andl $131071, %esi # imm = 0x1FFFF
-; AVX-NEXT: vmovd %esi, %xmm0
-; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; AVX-NEXT: shrq $34, %rcx
-; AVX-NEXT: andl $131071, %ecx # imm = 0x1FFFF
-; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; AVX-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: zext_4i17_to_4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: movl 8(%rdi), %eax
+; AVX1-NEXT: shll $13, %eax
+; AVX1-NEXT: movq (%rdi), %rcx
+; AVX1-NEXT: movq %rcx, %rdx
+; AVX1-NEXT: shrq $51, %rdx
+; AVX1-NEXT: orl %eax, %edx
+; AVX1-NEXT: movq %rcx, %rax
+; AVX1-NEXT: shrq $17, %rax
+; AVX1-NEXT: vmovd %ecx, %xmm0
+; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX1-NEXT: shrq $34, %rcx
+; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
+; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: zext_4i17_to_4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl 8(%rdi), %eax
+; AVX2-NEXT: shll $13, %eax
+; AVX2-NEXT: movq (%rdi), %rcx
+; AVX2-NEXT: movq %rcx, %rdx
+; AVX2-NEXT: shrq $51, %rdx
+; AVX2-NEXT: orl %eax, %edx
+; AVX2-NEXT: movq %rcx, %rax
+; AVX2-NEXT: shrq $17, %rax
+; AVX2-NEXT: vmovd %ecx, %xmm0
+; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX2-NEXT: shrq $34, %rcx
+; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: zext_4i17_to_4i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movl 8(%rdi), %eax
+; AVX512-NEXT: shll $13, %eax
+; AVX512-NEXT: movq (%rdi), %rcx
+; AVX512-NEXT: movq %rcx, %rdx
+; AVX512-NEXT: shrq $51, %rdx
+; AVX512-NEXT: orl %eax, %edx
+; AVX512-NEXT: movq %rcx, %rax
+; AVX512-NEXT: shrq $17, %rax
+; AVX512-NEXT: vmovd %ecx, %xmm0
+; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512-NEXT: shrq $34, %rcx
+; AVX512-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX512-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
%a = load <4 x i17>, <4 x i17>* %ptr
%b = zext <4 x i17> %a to <4 x i32>
ret <4 x i32> %b
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index 80f9022eead..d1983483412 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -2299,58 +2299,49 @@ define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) {
define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) {
; SSE2-LABEL: zext_4i17_to_4i32:
; SSE2: # %bb.0:
-; SSE2-NEXT: movl 8(%rdi), %eax
-; SSE2-NEXT: andl $15, %eax
-; SSE2-NEXT: shll $13, %eax
-; SSE2-NEXT: movq (%rdi), %rcx
-; SSE2-NEXT: movq %rcx, %rdx
-; SSE2-NEXT: shrq $51, %rdx
-; SSE2-NEXT: orl %eax, %edx
-; SSE2-NEXT: movd %edx, %xmm0
-; SSE2-NEXT: movq %rcx, %rax
-; SSE2-NEXT: shrq $34, %rax
-; SSE2-NEXT: andl $131071, %eax # imm = 0x1FFFF
-; SSE2-NEXT: movd %eax, %xmm1
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT: movl %ecx, %eax
-; SSE2-NEXT: andl $131071, %eax # imm = 0x1FFFF
+; SSE2-NEXT: movq (%rdi), %rax
; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: movq %rax, %rcx
; SSE2-NEXT: shrq $17, %rcx
-; SSE2-NEXT: andl $131071, %ecx # imm = 0x1FFFF
-; SSE2-NEXT: movd %ecx, %xmm2
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT: movd %ecx, %xmm1
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movl 8(%rdi), %ecx
+; SSE2-NEXT: shll $13, %ecx
+; SSE2-NEXT: movq %rax, %rdx
+; SSE2-NEXT: shrq $51, %rdx
+; SSE2-NEXT: orl %ecx, %edx
+; SSE2-NEXT: movd %edx, %xmm1
+; SSE2-NEXT: shrq $34, %rax
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: zext_4i17_to_4i32:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: movl 8(%rdi), %eax
-; SSSE3-NEXT: andl $15, %eax
-; SSSE3-NEXT: shll $13, %eax
-; SSSE3-NEXT: movq (%rdi), %rcx
-; SSSE3-NEXT: movq %rcx, %rdx
-; SSSE3-NEXT: shrq $51, %rdx
-; SSSE3-NEXT: orl %eax, %edx
-; SSSE3-NEXT: movd %edx, %xmm0
-; SSSE3-NEXT: movq %rcx, %rax
-; SSSE3-NEXT: shrq $34, %rax
-; SSSE3-NEXT: andl $131071, %eax # imm = 0x1FFFF
-; SSSE3-NEXT: movd %eax, %xmm1
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSSE3-NEXT: movl %ecx, %eax
-; SSSE3-NEXT: andl $131071, %eax # imm = 0x1FFFF
+; SSSE3-NEXT: movq (%rdi), %rax
; SSSE3-NEXT: movd %eax, %xmm0
+; SSSE3-NEXT: movq %rax, %rcx
; SSSE3-NEXT: shrq $17, %rcx
-; SSSE3-NEXT: andl $131071, %ecx # imm = 0x1FFFF
-; SSSE3-NEXT: movd %ecx, %xmm2
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT: movd %ecx, %xmm1
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: movl 8(%rdi), %ecx
+; SSSE3-NEXT: shll $13, %ecx
+; SSSE3-NEXT: movq %rax, %rdx
+; SSSE3-NEXT: shrq $51, %rdx
+; SSSE3-NEXT: orl %ecx, %edx
+; SSSE3-NEXT: movd %edx, %xmm1
+; SSSE3-NEXT: shrq $34, %rax
+; SSSE3-NEXT: movd %eax, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: zext_4i17_to_4i32:
; SSE41: # %bb.0:
; SSE41-NEXT: movl 8(%rdi), %eax
-; SSE41-NEXT: andl $15, %eax
; SSE41-NEXT: shll $13, %eax
; SSE41-NEXT: movq (%rdi), %rcx
; SSE41-NEXT: movq %rcx, %rdx
@@ -2358,38 +2349,69 @@ define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) {
; SSE41-NEXT: orl %eax, %edx
; SSE41-NEXT: movq %rcx, %rax
; SSE41-NEXT: shrq $17, %rax
-; SSE41-NEXT: andl $131071, %eax # imm = 0x1FFFF
-; SSE41-NEXT: movl %ecx, %esi
-; SSE41-NEXT: andl $131071, %esi # imm = 0x1FFFF
-; SSE41-NEXT: movd %esi, %xmm0
+; SSE41-NEXT: movd %ecx, %xmm0
; SSE41-NEXT: pinsrd $1, %eax, %xmm0
; SSE41-NEXT: shrq $34, %rcx
-; SSE41-NEXT: andl $131071, %ecx # imm = 0x1FFFF
; SSE41-NEXT: pinsrd $2, %ecx, %xmm0
; SSE41-NEXT: pinsrd $3, %edx, %xmm0
+; SSE41-NEXT: pand {{.*}}(%rip), %xmm0
; SSE41-NEXT: retq
;
-; AVX-LABEL: zext_4i17_to_4i32:
-; AVX: # %bb.0:
-; AVX-NEXT: movl 8(%rdi), %eax
-; AVX-NEXT: andl $15, %eax
-; AVX-NEXT: shll $13, %eax
-; AVX-NEXT: movq (%rdi), %rcx
-; AVX-NEXT: movq %rcx, %rdx
-; AVX-NEXT: shrq $51, %rdx
-; AVX-NEXT: orl %eax, %edx
-; AVX-NEXT: movq %rcx, %rax
-; AVX-NEXT: shrq $17, %rax
-; AVX-NEXT: andl $131071, %eax # imm = 0x1FFFF
-; AVX-NEXT: movl %ecx, %esi
-; AVX-NEXT: andl $131071, %esi # imm = 0x1FFFF
-; AVX-NEXT: vmovd %esi, %xmm0
-; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
-; AVX-NEXT: shrq $34, %rcx
-; AVX-NEXT: andl $131071, %ecx # imm = 0x1FFFF
-; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
-; AVX-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: zext_4i17_to_4i32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: movl 8(%rdi), %eax
+; AVX1-NEXT: shll $13, %eax
+; AVX1-NEXT: movq (%rdi), %rcx
+; AVX1-NEXT: movq %rcx, %rdx
+; AVX1-NEXT: shrq $51, %rdx
+; AVX1-NEXT: orl %eax, %edx
+; AVX1-NEXT: movq %rcx, %rax
+; AVX1-NEXT: shrq $17, %rax
+; AVX1-NEXT: vmovd %ecx, %xmm0
+; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX1-NEXT: shrq $34, %rcx
+; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX1-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
+; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: zext_4i17_to_4i32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl 8(%rdi), %eax
+; AVX2-NEXT: shll $13, %eax
+; AVX2-NEXT: movq (%rdi), %rcx
+; AVX2-NEXT: movq %rcx, %rdx
+; AVX2-NEXT: shrq $51, %rdx
+; AVX2-NEXT: orl %eax, %edx
+; AVX2-NEXT: movq %rcx, %rax
+; AVX2-NEXT: shrq $17, %rax
+; AVX2-NEXT: vmovd %ecx, %xmm0
+; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX2-NEXT: shrq $34, %rcx
+; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX2-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: zext_4i17_to_4i32:
+; AVX512: # %bb.0:
+; AVX512-NEXT: movl 8(%rdi), %eax
+; AVX512-NEXT: shll $13, %eax
+; AVX512-NEXT: movq (%rdi), %rcx
+; AVX512-NEXT: movq %rcx, %rdx
+; AVX512-NEXT: shrq $51, %rdx
+; AVX512-NEXT: orl %eax, %edx
+; AVX512-NEXT: movq %rcx, %rax
+; AVX512-NEXT: shrq $17, %rax
+; AVX512-NEXT: vmovd %ecx, %xmm0
+; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX512-NEXT: shrq $34, %rcx
+; AVX512-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX512-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
+; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [131071,131071,131071,131071]
+; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: retq
%a = load <4 x i17>, <4 x i17>* %ptr
%b = zext <4 x i17> %a to <4 x i32>
ret <4 x i32> %b
OpenPOWER on IntegriCloud