diff options
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-sext-widen.ll | 160 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-sext.ll | 160 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-zext-widen.ll | 99 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-zext.ll | 99 |
4 files changed, 518 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-sext-widen.ll b/llvm/test/CodeGen/X86/vector-sext-widen.ll index 5cd814dbfdf..f8c55bb26e4 100644 --- a/llvm/test/CodeGen/X86/vector-sext-widen.ll +++ b/llvm/test/CodeGen/X86/vector-sext-widen.ll @@ -5897,3 +5897,163 @@ define <2 x i32> @sext_2i8_to_2i32(<2 x i8>* %addr) { ret <2 x i32>%z } +define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { +; SSE2-LABEL: sext_4i17_to_4i32: +; SSE2: # %bb.0: +; SSE2-NEXT: movq (%rdi), %rax +; SSE2-NEXT: movq %rax, %rcx +; SSE2-NEXT: shlq $30, %rcx +; SSE2-NEXT: sarq $47, %rcx +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: movq %rax, %rcx +; SSE2-NEXT: shlq $47, %rcx +; SSE2-NEXT: sarq $47, %rcx +; SSE2-NEXT: movd %ecx, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movl 8(%rdi), %ecx +; SSE2-NEXT: shll $13, %ecx +; SSE2-NEXT: movq %rax, %rdx +; SSE2-NEXT: shrq $51, %rdx +; SSE2-NEXT: orl %ecx, %edx +; SSE2-NEXT: shlq $47, %rdx +; SSE2-NEXT: sarq $47, %rdx +; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: shlq $13, %rax +; SSE2-NEXT: sarq $47, %rax +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_4i17_to_4i32: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movq (%rdi), %rax +; SSSE3-NEXT: movq %rax, %rcx +; SSSE3-NEXT: shlq $30, %rcx +; SSSE3-NEXT: sarq $47, %rcx +; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: movq %rax, %rcx +; SSSE3-NEXT: shlq $47, %rcx +; SSSE3-NEXT: sarq $47, %rcx +; SSSE3-NEXT: movd %ecx, %xmm0 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: movl 8(%rdi), %ecx +; SSSE3-NEXT: shll $13, %ecx +; SSSE3-NEXT: movq %rax, %rdx +; SSSE3-NEXT: shrq $51, %rdx +; SSSE3-NEXT: orl %ecx, %edx +; SSSE3-NEXT: shlq $47, %rdx +; SSSE3-NEXT: sarq $47, %rdx +; SSSE3-NEXT: movd %edx, %xmm1 +; SSSE3-NEXT: shlq $13, %rax +; SSSE3-NEXT: sarq $47, %rax +; SSSE3-NEXT: movd %eax, %xmm2 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_4i17_to_4i32: +; SSE41: # %bb.0: +; SSE41-NEXT: movq (%rdi), %rax +; SSE41-NEXT: movq %rax, %rcx +; SSE41-NEXT: shlq $30, %rcx +; SSE41-NEXT: sarq $47, %rcx +; SSE41-NEXT: movq %rax, %rdx +; SSE41-NEXT: shlq $47, %rdx +; SSE41-NEXT: sarq $47, %rdx +; SSE41-NEXT: movd %edx, %xmm0 +; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 +; SSE41-NEXT: movq %rax, %rcx +; SSE41-NEXT: shlq $13, %rcx +; SSE41-NEXT: sarq $47, %rcx +; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 +; SSE41-NEXT: movl 8(%rdi), %ecx +; SSE41-NEXT: shll $13, %ecx +; SSE41-NEXT: shrq $51, %rax +; SSE41-NEXT: orl %ecx, %eax +; SSE41-NEXT: shlq $47, %rax +; SSE41-NEXT: sarq $47, %rax +; SSE41-NEXT: pinsrd $3, %eax, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: sext_4i17_to_4i32: +; AVX: # %bb.0: +; AVX-NEXT: movq (%rdi), %rax +; AVX-NEXT: movq %rax, %rcx +; AVX-NEXT: shlq $30, %rcx +; AVX-NEXT: sarq $47, %rcx +; AVX-NEXT: movq %rax, %rdx +; AVX-NEXT: shlq $47, %rdx +; AVX-NEXT: sarq $47, %rdx +; AVX-NEXT: vmovd %edx, %xmm0 +; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; AVX-NEXT: movq %rax, %rcx +; AVX-NEXT: shlq $13, %rcx +; AVX-NEXT: sarq $47, %rcx +; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 +; AVX-NEXT: movl 8(%rdi), %ecx +; AVX-NEXT: shll $13, %ecx +; AVX-NEXT: shrq $51, %rax +; AVX-NEXT: orl %ecx, %eax +; AVX-NEXT: shlq $47, %rax +; AVX-NEXT: sarq $47, %rax +; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; X32-SSE2-LABEL: sext_4i17_to_4i32: +; X32-SSE2: # %bb.0: +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE2-NEXT: movl (%eax), %ecx +; X32-SSE2-NEXT: movl 4(%eax), %edx +; X32-SSE2-NEXT: movl 8(%eax), %eax +; X32-SSE2-NEXT: shldl $13, %edx, %eax +; X32-SSE2-NEXT: shll $15, %eax +; X32-SSE2-NEXT: sarl $15, %eax +; X32-SSE2-NEXT: movd %eax, %xmm0 +; X32-SSE2-NEXT: movl %edx, %eax +; X32-SSE2-NEXT: shll $13, %eax +; X32-SSE2-NEXT: sarl $15, %eax +; X32-SSE2-NEXT: movd %eax, %xmm1 +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-SSE2-NEXT: shldl $15, %ecx, %edx +; X32-SSE2-NEXT: shll $15, %ecx +; X32-SSE2-NEXT: sarl $15, %ecx +; X32-SSE2-NEXT: movd %ecx, %xmm0 +; X32-SSE2-NEXT: shll $15, %edx +; X32-SSE2-NEXT: sarl $15, %edx +; X32-SSE2-NEXT: movd %edx, %xmm2 +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-SSE2-NEXT: retl +; +; X32-SSE41-LABEL: sext_4i17_to_4i32: +; X32-SSE41: # %bb.0: +; X32-SSE41-NEXT: pushl %esi +; X32-SSE41-NEXT: .cfi_def_cfa_offset 8 +; X32-SSE41-NEXT: .cfi_offset %esi, -8 +; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE41-NEXT: movl (%eax), %ecx +; X32-SSE41-NEXT: movl 4(%eax), %edx +; X32-SSE41-NEXT: movl %edx, %esi +; X32-SSE41-NEXT: movl 8(%eax), %eax +; X32-SSE41-NEXT: shldl $13, %edx, %eax +; X32-SSE41-NEXT: shldl $15, %ecx, %edx +; X32-SSE41-NEXT: shll $15, %edx +; X32-SSE41-NEXT: sarl $15, %edx +; X32-SSE41-NEXT: shll $15, %ecx +; X32-SSE41-NEXT: sarl $15, %ecx +; X32-SSE41-NEXT: movd %ecx, %xmm0 +; X32-SSE41-NEXT: pinsrd $1, %edx, %xmm0 +; X32-SSE41-NEXT: shll $13, %esi +; X32-SSE41-NEXT: sarl $15, %esi +; X32-SSE41-NEXT: pinsrd $2, %esi, %xmm0 +; X32-SSE41-NEXT: shll $15, %eax +; X32-SSE41-NEXT: sarl $15, %eax +; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 +; X32-SSE41-NEXT: popl %esi +; X32-SSE41-NEXT: .cfi_def_cfa_offset 4 +; X32-SSE41-NEXT: retl + %a = load <4 x i17>, <4 x i17>* %ptr + %b = sext <4 x i17> %a to <4 x i32> + ret <4 x i32> %b +} diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index a5784f326d6..5449552d195 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -5915,3 +5915,163 @@ define <2 x i32> @sext_2i8_to_2i32(<2 x i8>* %addr) { ret <2 x i32>%z } +define <4 x i32> @sext_4i17_to_4i32(<4 x i17>* %ptr) { +; SSE2-LABEL: sext_4i17_to_4i32: +; SSE2: # %bb.0: +; SSE2-NEXT: movq (%rdi), %rax +; SSE2-NEXT: movq %rax, %rcx +; SSE2-NEXT: shlq $30, %rcx +; SSE2-NEXT: sarq $47, %rcx +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: movq %rax, %rcx +; SSE2-NEXT: shlq $47, %rcx +; SSE2-NEXT: sarq $47, %rcx +; SSE2-NEXT: movd %ecx, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: movl 8(%rdi), %ecx +; SSE2-NEXT: shll $13, %ecx +; SSE2-NEXT: movq %rax, %rdx +; SSE2-NEXT: shrq $51, %rdx +; SSE2-NEXT: orl %ecx, %edx +; SSE2-NEXT: shlq $47, %rdx +; SSE2-NEXT: sarq $47, %rdx +; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: shlq $13, %rax +; SSE2-NEXT: sarq $47, %rax +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_4i17_to_4i32: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movq (%rdi), %rax +; SSSE3-NEXT: movq %rax, %rcx +; SSSE3-NEXT: shlq $30, %rcx +; SSSE3-NEXT: sarq $47, %rcx +; SSSE3-NEXT: movd %ecx, %xmm1 +; SSSE3-NEXT: movq %rax, %rcx +; SSSE3-NEXT: shlq $47, %rcx +; SSSE3-NEXT: sarq $47, %rcx +; SSSE3-NEXT: movd %ecx, %xmm0 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSSE3-NEXT: movl 8(%rdi), %ecx +; SSSE3-NEXT: shll $13, %ecx +; SSSE3-NEXT: movq %rax, %rdx +; SSSE3-NEXT: shrq $51, %rdx +; SSSE3-NEXT: orl %ecx, %edx +; SSSE3-NEXT: shlq $47, %rdx +; SSSE3-NEXT: sarq $47, %rdx +; SSSE3-NEXT: movd %edx, %xmm1 +; SSSE3-NEXT: shlq $13, %rax +; SSSE3-NEXT: sarq $47, %rax +; SSSE3-NEXT: movd %eax, %xmm2 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_4i17_to_4i32: +; SSE41: # %bb.0: +; SSE41-NEXT: movq (%rdi), %rax +; SSE41-NEXT: movq %rax, %rcx +; SSE41-NEXT: shlq $30, %rcx +; SSE41-NEXT: sarq $47, %rcx +; SSE41-NEXT: movq %rax, %rdx +; SSE41-NEXT: shlq $47, %rdx +; SSE41-NEXT: sarq $47, %rdx +; SSE41-NEXT: movd %edx, %xmm0 +; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 +; SSE41-NEXT: movq %rax, %rcx +; SSE41-NEXT: shlq $13, %rcx +; SSE41-NEXT: sarq $47, %rcx +; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 +; SSE41-NEXT: movl 8(%rdi), %ecx +; SSE41-NEXT: shll $13, %ecx +; SSE41-NEXT: shrq $51, %rax +; SSE41-NEXT: orl %ecx, %eax +; SSE41-NEXT: shlq $47, %rax +; SSE41-NEXT: sarq $47, %rax +; SSE41-NEXT: pinsrd $3, %eax, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: sext_4i17_to_4i32: +; AVX: # %bb.0: +; AVX-NEXT: movq (%rdi), %rax +; AVX-NEXT: movq %rax, %rcx +; AVX-NEXT: shlq $30, %rcx +; AVX-NEXT: sarq $47, %rcx +; AVX-NEXT: movq %rax, %rdx +; AVX-NEXT: shlq $47, %rdx +; AVX-NEXT: sarq $47, %rdx +; AVX-NEXT: vmovd %edx, %xmm0 +; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; AVX-NEXT: movq %rax, %rcx +; AVX-NEXT: shlq $13, %rcx +; AVX-NEXT: sarq $47, %rcx +; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 +; AVX-NEXT: movl 8(%rdi), %ecx +; AVX-NEXT: shll $13, %ecx +; AVX-NEXT: shrq $51, %rax +; AVX-NEXT: orl %ecx, %eax +; AVX-NEXT: shlq $47, %rax +; AVX-NEXT: sarq $47, %rax +; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; X32-SSE2-LABEL: sext_4i17_to_4i32: +; X32-SSE2: # %bb.0: +; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE2-NEXT: movl (%eax), %ecx +; X32-SSE2-NEXT: movl 4(%eax), %edx +; X32-SSE2-NEXT: movl 8(%eax), %eax +; X32-SSE2-NEXT: shldl $13, %edx, %eax +; X32-SSE2-NEXT: shll $15, %eax +; X32-SSE2-NEXT: sarl $15, %eax +; X32-SSE2-NEXT: movd %eax, %xmm0 +; X32-SSE2-NEXT: movl %edx, %eax +; X32-SSE2-NEXT: shll $13, %eax +; X32-SSE2-NEXT: sarl $15, %eax +; X32-SSE2-NEXT: movd %eax, %xmm1 +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X32-SSE2-NEXT: shldl $15, %ecx, %edx +; X32-SSE2-NEXT: shll $15, %ecx +; X32-SSE2-NEXT: sarl $15, %ecx +; X32-SSE2-NEXT: movd %ecx, %xmm0 +; X32-SSE2-NEXT: shll $15, %edx +; X32-SSE2-NEXT: sarl $15, %edx +; X32-SSE2-NEXT: movd %edx, %xmm2 +; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-SSE2-NEXT: retl +; +; X32-SSE41-LABEL: sext_4i17_to_4i32: +; X32-SSE41: # %bb.0: +; X32-SSE41-NEXT: pushl %esi +; X32-SSE41-NEXT: .cfi_def_cfa_offset 8 +; X32-SSE41-NEXT: .cfi_offset %esi, -8 +; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-SSE41-NEXT: movl (%eax), %ecx +; X32-SSE41-NEXT: movl 4(%eax), %edx +; X32-SSE41-NEXT: movl %edx, %esi +; X32-SSE41-NEXT: movl 8(%eax), %eax +; X32-SSE41-NEXT: shldl $13, %edx, %eax +; X32-SSE41-NEXT: shldl $15, %ecx, %edx +; X32-SSE41-NEXT: shll $15, %edx +; X32-SSE41-NEXT: sarl $15, %edx +; X32-SSE41-NEXT: shll $15, %ecx +; X32-SSE41-NEXT: sarl $15, %ecx +; X32-SSE41-NEXT: movd %ecx, %xmm0 +; X32-SSE41-NEXT: pinsrd $1, %edx, %xmm0 +; X32-SSE41-NEXT: shll $13, %esi +; X32-SSE41-NEXT: sarl $15, %esi +; X32-SSE41-NEXT: pinsrd $2, %esi, %xmm0 +; X32-SSE41-NEXT: shll $15, %eax +; X32-SSE41-NEXT: sarl $15, %eax +; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 +; X32-SSE41-NEXT: popl %esi +; X32-SSE41-NEXT: .cfi_def_cfa_offset 4 +; X32-SSE41-NEXT: retl + %a = load <4 x i17>, <4 x i17>* %ptr + %b = sext <4 x i17> %a to <4 x i32> + ret <4 x i32> %b +} diff --git a/llvm/test/CodeGen/X86/vector-zext-widen.ll b/llvm/test/CodeGen/X86/vector-zext-widen.ll index 2eafc7cdaf0..5e9c2a6b111 100644 --- a/llvm/test/CodeGen/X86/vector-zext-widen.ll +++ b/llvm/test/CodeGen/X86/vector-zext-widen.ll @@ -2281,3 +2281,102 @@ define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) { %z = add <2 x i32>%y, %y ret <2 x i32>%z } + +define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) { +; SSE2-LABEL: zext_4i17_to_4i32: +; SSE2: # %bb.0: +; SSE2-NEXT: movl 8(%rdi), %eax +; SSE2-NEXT: andl $15, %eax +; SSE2-NEXT: shll $13, %eax +; SSE2-NEXT: movq (%rdi), %rcx +; SSE2-NEXT: movq %rcx, %rdx +; SSE2-NEXT: shrq $51, %rdx +; SSE2-NEXT: orl %eax, %edx +; SSE2-NEXT: movd %edx, %xmm0 +; SSE2-NEXT: movq %rcx, %rax +; SSE2-NEXT: shrq $34, %rax +; SSE2-NEXT: andl $131071, %eax # imm = 0x1FFFF +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movl %ecx, %eax +; SSE2-NEXT: andl $131071, %eax # imm = 0x1FFFF +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: shrq $17, %rcx +; SSE2-NEXT: andl $131071, %ecx # imm = 0x1FFFF +; SSE2-NEXT: movd %ecx, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_4i17_to_4i32: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movl 8(%rdi), %eax +; SSSE3-NEXT: andl $15, %eax +; SSSE3-NEXT: shll $13, %eax +; SSSE3-NEXT: movq (%rdi), %rcx +; SSSE3-NEXT: movq %rcx, %rdx +; SSSE3-NEXT: shrq $51, %rdx +; SSSE3-NEXT: orl %eax, %edx +; SSSE3-NEXT: movd %edx, %xmm0 +; SSSE3-NEXT: movq %rcx, %rax +; SSSE3-NEXT: shrq $34, %rax +; SSSE3-NEXT: andl $131071, %eax # imm = 0x1FFFF +; SSSE3-NEXT: movd %eax, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: movl %ecx, %eax +; SSSE3-NEXT: andl $131071, %eax # imm = 0x1FFFF +; SSSE3-NEXT: movd %eax, %xmm0 +; SSSE3-NEXT: shrq $17, %rcx +; SSSE3-NEXT: andl $131071, %ecx # imm = 0x1FFFF +; SSSE3-NEXT: movd %ecx, %xmm2 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_4i17_to_4i32: +; SSE41: # %bb.0: +; SSE41-NEXT: movl 8(%rdi), %eax +; SSE41-NEXT: andl $15, %eax +; SSE41-NEXT: shll $13, %eax +; SSE41-NEXT: movq (%rdi), %rcx +; SSE41-NEXT: movq %rcx, %rdx +; SSE41-NEXT: shrq $51, %rdx +; SSE41-NEXT: orl %eax, %edx +; SSE41-NEXT: movq %rcx, %rax +; SSE41-NEXT: shrq $17, %rax +; SSE41-NEXT: andl $131071, %eax # imm = 0x1FFFF +; SSE41-NEXT: movl %ecx, %esi +; SSE41-NEXT: andl $131071, %esi # imm = 0x1FFFF +; SSE41-NEXT: movd %esi, %xmm0 +; SSE41-NEXT: pinsrd $1, %eax, %xmm0 +; SSE41-NEXT: shrq $34, %rcx +; SSE41-NEXT: andl $131071, %ecx # imm = 0x1FFFF +; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 +; SSE41-NEXT: pinsrd $3, %edx, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: zext_4i17_to_4i32: +; AVX: # %bb.0: +; AVX-NEXT: movl 8(%rdi), %eax +; AVX-NEXT: andl $15, %eax +; AVX-NEXT: shll $13, %eax +; AVX-NEXT: movq (%rdi), %rcx +; AVX-NEXT: movq %rcx, %rdx +; AVX-NEXT: shrq $51, %rdx +; AVX-NEXT: orl %eax, %edx +; AVX-NEXT: movq %rcx, %rax +; AVX-NEXT: shrq $17, %rax +; AVX-NEXT: andl $131071, %eax # imm = 0x1FFFF +; AVX-NEXT: movl %ecx, %esi +; AVX-NEXT: andl $131071, %esi # imm = 0x1FFFF +; AVX-NEXT: vmovd %esi, %xmm0 +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: shrq $34, %rcx +; AVX-NEXT: andl $131071, %ecx # imm = 0x1FFFF +; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 +; AVX-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 +; AVX-NEXT: retq + %a = load <4 x i17>, <4 x i17>* %ptr + %b = zext <4 x i17> %a to <4 x i32> + ret <4 x i32> %b +} diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index 6de913079a5..80f9022eead 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -2295,3 +2295,102 @@ define <2 x i32> @zext_2i8_to_2i32(<2 x i8>* %addr) { %z = add <2 x i32>%y, %y ret <2 x i32>%z } + +define <4 x i32> @zext_4i17_to_4i32(<4 x i17>* %ptr) { +; SSE2-LABEL: zext_4i17_to_4i32: +; SSE2: # %bb.0: +; SSE2-NEXT: movl 8(%rdi), %eax +; SSE2-NEXT: andl $15, %eax +; SSE2-NEXT: shll $13, %eax +; SSE2-NEXT: movq (%rdi), %rcx +; SSE2-NEXT: movq %rcx, %rdx +; SSE2-NEXT: shrq $51, %rdx +; SSE2-NEXT: orl %eax, %edx +; SSE2-NEXT: movd %edx, %xmm0 +; SSE2-NEXT: movq %rcx, %rax +; SSE2-NEXT: shrq $34, %rax +; SSE2-NEXT: andl $131071, %eax # imm = 0x1FFFF +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movl %ecx, %eax +; SSE2-NEXT: andl $131071, %eax # imm = 0x1FFFF +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: shrq $17, %rcx +; SSE2-NEXT: andl $131071, %ecx # imm = 0x1FFFF +; SSE2-NEXT: movd %ecx, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: retq +; +; SSSE3-LABEL: zext_4i17_to_4i32: +; SSSE3: # %bb.0: +; SSSE3-NEXT: movl 8(%rdi), %eax +; SSSE3-NEXT: andl $15, %eax +; SSSE3-NEXT: shll $13, %eax +; SSSE3-NEXT: movq (%rdi), %rcx +; SSSE3-NEXT: movq %rcx, %rdx +; SSSE3-NEXT: shrq $51, %rdx +; SSSE3-NEXT: orl %eax, %edx +; SSSE3-NEXT: movd %edx, %xmm0 +; SSSE3-NEXT: movq %rcx, %rax +; SSSE3-NEXT: shrq $34, %rax +; SSSE3-NEXT: andl $131071, %eax # imm = 0x1FFFF +; SSSE3-NEXT: movd %eax, %xmm1 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSSE3-NEXT: movl %ecx, %eax +; SSSE3-NEXT: andl $131071, %eax # imm = 0x1FFFF +; SSSE3-NEXT: movd %eax, %xmm0 +; SSSE3-NEXT: shrq $17, %rcx +; SSSE3-NEXT: andl $131071, %ecx # imm = 0x1FFFF +; SSSE3-NEXT: movd %ecx, %xmm2 +; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: zext_4i17_to_4i32: +; SSE41: # %bb.0: +; SSE41-NEXT: movl 8(%rdi), %eax +; SSE41-NEXT: andl $15, %eax +; SSE41-NEXT: shll $13, %eax +; SSE41-NEXT: movq (%rdi), %rcx +; SSE41-NEXT: movq %rcx, %rdx +; SSE41-NEXT: shrq $51, %rdx +; SSE41-NEXT: orl %eax, %edx +; SSE41-NEXT: movq %rcx, %rax +; SSE41-NEXT: shrq $17, %rax +; SSE41-NEXT: andl $131071, %eax # imm = 0x1FFFF +; SSE41-NEXT: movl %ecx, %esi +; SSE41-NEXT: andl $131071, %esi # imm = 0x1FFFF +; SSE41-NEXT: movd %esi, %xmm0 +; SSE41-NEXT: pinsrd $1, %eax, %xmm0 +; SSE41-NEXT: shrq $34, %rcx +; SSE41-NEXT: andl $131071, %ecx # imm = 0x1FFFF +; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 +; SSE41-NEXT: pinsrd $3, %edx, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: zext_4i17_to_4i32: +; AVX: # %bb.0: +; AVX-NEXT: movl 8(%rdi), %eax +; AVX-NEXT: andl $15, %eax +; AVX-NEXT: shll $13, %eax +; AVX-NEXT: movq (%rdi), %rcx +; AVX-NEXT: movq %rcx, %rdx +; AVX-NEXT: shrq $51, %rdx +; AVX-NEXT: orl %eax, %edx +; AVX-NEXT: movq %rcx, %rax +; AVX-NEXT: shrq $17, %rax +; AVX-NEXT: andl $131071, %eax # imm = 0x1FFFF +; AVX-NEXT: movl %ecx, %esi +; AVX-NEXT: andl $131071, %esi # imm = 0x1FFFF +; AVX-NEXT: vmovd %esi, %xmm0 +; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: shrq $34, %rcx +; AVX-NEXT: andl $131071, %ecx # imm = 0x1FFFF +; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 +; AVX-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0 +; AVX-NEXT: retq + %a = load <4 x i17>, <4 x i17>* %ptr + %b = zext <4 x i17> %a to <4 x i32> + ret <4 x i32> %b +} |

