diff options
| -rw-r--r-- | llvm/test/CodeGen/X86/horizontal-reduce-smax.ll | 438 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/horizontal-reduce-smin.ll | 438 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/horizontal-reduce-umax.ll | 418 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/horizontal-reduce-umin.ll | 394 |
4 files changed, 1688 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll index c08f08f383b..32fee3a2e39 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll @@ -1829,3 +1829,441 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) { %19 = extractelement <64 x i8> %18, i32 0 ret i8 %19 } + +; +; Partial Vector Reductions +; + +define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { +; X86-SSE2-LABEL: test_reduce_v16i16_v8i16: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: psrld $16, %xmm1 +; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v16i16_v8i16: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: pxor LCPI12_0, %xmm0 +; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF +; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v16i16_v8i16: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v16i16_v8i16: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: psrld $16, %xmm1 +; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v16i16_v8i16: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF +; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v16i16_v8i16: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp sgt <16 x i16> %a0, %1 + %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 + %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp sgt <16 x i16> %3, %4 + %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp sgt <16 x i16> %6, %7 + %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 + %10 = extractelement <16 x i16> %9, i32 0 + ret i16 %10 +} + +define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { +; X86-SSE2-LABEL: test_reduce_v32i16_v8i16: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: psrld $16, %xmm1 +; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v32i16_v8i16: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: pxor LCPI13_0, %xmm0 +; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF +; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v32i16_v8i16: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v32i16_v8i16: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: psrld $16, %xmm1 +; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v32i16_v8i16: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF +; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v32i16_v8i16: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp sgt <32 x i16> %a0, %1 + %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 + %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp sgt <32 x i16> %3, %4 + %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 + %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp sgt <32 x i16> %6, %7 + %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 + %10 = extractelement <32 x i16> %9, i32 0 + ret i16 %10 +} + +define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { +; X86-SSE2-LABEL: test_reduce_v32i8_v16i8: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 +; X86-SSE2-NEXT: pand %xmm2, %xmm0 +; X86-SSE2-NEXT: pandn %xmm1, %xmm2 +; X86-SSE2-NEXT: por %xmm0, %xmm2 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] +; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 +; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 +; X86-SSE2-NEXT: pand %xmm1, %xmm2 +; X86-SSE2-NEXT: pandn %xmm0, %xmm1 +; X86-SSE2-NEXT: por %xmm2, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: psrld $16, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 +; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 +; X86-SSE2-NEXT: pand %xmm2, %xmm1 +; X86-SSE2-NEXT: pandn %xmm0, %xmm2 +; X86-SSE2-NEXT: por %xmm1, %xmm2 +; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 +; X86-SSE2-NEXT: psrlw $8, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 +; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 +; X86-SSE2-NEXT: pand %xmm1, %xmm2 +; X86-SSE2-NEXT: pandn %xmm0, %xmm1 +; X86-SSE2-NEXT: por %xmm2, %xmm1 +; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v32i8_v16i8: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: pxor LCPI14_0, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE42-NEXT: psrlw $8, %xmm1 +; X86-SSE42-NEXT: pminub %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: xorb $127, %al +; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v32i8_v16i8: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v32i8_v16i8: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 +; X64-SSE2-NEXT: pand %xmm2, %xmm0 +; X64-SSE2-NEXT: pandn %xmm1, %xmm2 +; X64-SSE2-NEXT: por %xmm0, %xmm2 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] +; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 +; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 +; X64-SSE2-NEXT: pand %xmm1, %xmm2 +; X64-SSE2-NEXT: pandn %xmm0, %xmm1 +; X64-SSE2-NEXT: por %xmm2, %xmm1 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE2-NEXT: psrld $16, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 +; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 +; X64-SSE2-NEXT: pand %xmm2, %xmm1 +; X64-SSE2-NEXT: pandn %xmm0, %xmm2 +; X64-SSE2-NEXT: por %xmm1, %xmm2 +; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 +; X64-SSE2-NEXT: psrlw $8, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 +; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 +; X64-SSE2-NEXT: pand %xmm1, %xmm2 +; X64-SSE2-NEXT: pandn %xmm0, %xmm1 +; X64-SSE2-NEXT: por %xmm2, %xmm1 +; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v32i8_v16i8: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE42-NEXT: psrlw $8, %xmm1 +; X64-SSE42-NEXT: pminub %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: xorb $127, %al +; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v32i8_v16i8: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp sgt <32 x i8> %a0, %1 + %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 + %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp sgt <32 x i8> %3, %4 + %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp sgt <32 x i8> %6, %7 + %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 + %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %11 = icmp sgt <32 x i8> %9, %10 + %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 + %13 = extractelement <32 x i8> %12, i32 0 + ret i8 %13 +} + +define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { +; X86-SSE2-LABEL: test_reduce_v64i8_v16i8: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 +; X86-SSE2-NEXT: pand %xmm2, %xmm0 +; X86-SSE2-NEXT: pandn %xmm1, %xmm2 +; X86-SSE2-NEXT: por %xmm0, %xmm2 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] +; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 +; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 +; X86-SSE2-NEXT: pand %xmm1, %xmm2 +; X86-SSE2-NEXT: pandn %xmm0, %xmm1 +; X86-SSE2-NEXT: por %xmm2, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: psrld $16, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 +; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 +; X86-SSE2-NEXT: pand %xmm2, %xmm1 +; X86-SSE2-NEXT: pandn %xmm0, %xmm2 +; X86-SSE2-NEXT: por %xmm1, %xmm2 +; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 +; X86-SSE2-NEXT: psrlw $8, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm2, %xmm1 +; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 +; X86-SSE2-NEXT: pand %xmm1, %xmm2 +; X86-SSE2-NEXT: pandn %xmm0, %xmm1 +; X86-SSE2-NEXT: por %xmm2, %xmm1 +; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v64i8_v16i8: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: pxor LCPI15_0, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE42-NEXT: psrlw $8, %xmm1 +; X86-SSE42-NEXT: pminub %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: xorb $127, %al +; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v64i8_v16i8: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v64i8_v16i8: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 +; X64-SSE2-NEXT: pand %xmm2, %xmm0 +; X64-SSE2-NEXT: pandn %xmm1, %xmm2 +; X64-SSE2-NEXT: por %xmm0, %xmm2 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] +; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 +; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 +; X64-SSE2-NEXT: pand %xmm1, %xmm2 +; X64-SSE2-NEXT: pandn %xmm0, %xmm1 +; X64-SSE2-NEXT: por %xmm2, %xmm1 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE2-NEXT: psrld $16, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 +; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 +; X64-SSE2-NEXT: pand %xmm2, %xmm1 +; X64-SSE2-NEXT: pandn %xmm0, %xmm2 +; X64-SSE2-NEXT: por %xmm1, %xmm2 +; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 +; X64-SSE2-NEXT: psrlw $8, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm2, %xmm1 +; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 +; X64-SSE2-NEXT: pand %xmm1, %xmm2 +; X64-SSE2-NEXT: pandn %xmm0, %xmm1 +; X64-SSE2-NEXT: por %xmm2, %xmm1 +; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v64i8_v16i8: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE42-NEXT: psrlw $8, %xmm1 +; X64-SSE42-NEXT: pminub %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: xorb $127, %al +; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v64i8_v16i8: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp sgt <64 x i8> %a0, %1 + %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 + %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp sgt <64 x i8> %3, %4 + %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 + %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp sgt <64 x i8> %6, %7 + %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 + %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %11 = icmp sgt <64 x i8> %9, %10 + %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 + %13 = extractelement <64 x i8> %12, i32 0 + ret i8 %13 +} diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll index c526cb8f99c..a3443223782 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll @@ -1833,3 +1833,441 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) { %19 = extractelement <64 x i8> %18, i32 0 ret i8 %19 } + +; +; Partial Vector Reductions +; + +define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { +; X86-SSE2-LABEL: test_reduce_v16i16_v8i16: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: psrld $16, %xmm1 +; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v16i16_v8i16: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: pxor LCPI12_0, %xmm0 +; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 +; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v16i16_v8i16: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v16i16_v8i16: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: psrld $16, %xmm1 +; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v16i16_v8i16: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 +; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v16i16_v8i16: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp slt <16 x i16> %a0, %1 + %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 + %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp slt <16 x i16> %3, %4 + %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp slt <16 x i16> %6, %7 + %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 + %10 = extractelement <16 x i16> %9, i32 0 + ret i16 %10 +} + +define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { +; X86-SSE2-LABEL: test_reduce_v32i16_v8i16: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: psrld $16, %xmm1 +; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v32i16_v8i16: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: pxor LCPI13_0, %xmm0 +; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 +; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v32i16_v8i16: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v32i16_v8i16: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: psrld $16, %xmm1 +; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v32i16_v8i16: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 +; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v32i16_v8i16: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp slt <32 x i16> %a0, %1 + %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 + %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp slt <32 x i16> %3, %4 + %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 + %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp slt <32 x i16> %6, %7 + %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 + %10 = extractelement <32 x i16> %9, i32 0 + ret i16 %10 +} + +define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { +; X86-SSE2-LABEL: test_reduce_v32i8_v16i8: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 +; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 +; X86-SSE2-NEXT: pand %xmm2, %xmm0 +; X86-SSE2-NEXT: pandn %xmm1, %xmm2 +; X86-SSE2-NEXT: por %xmm0, %xmm2 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 +; X86-SSE2-NEXT: pand %xmm1, %xmm2 +; X86-SSE2-NEXT: pandn %xmm0, %xmm1 +; X86-SSE2-NEXT: por %xmm2, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: psrld $16, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 +; X86-SSE2-NEXT: pand %xmm2, %xmm1 +; X86-SSE2-NEXT: pandn %xmm0, %xmm2 +; X86-SSE2-NEXT: por %xmm1, %xmm2 +; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 +; X86-SSE2-NEXT: psrlw $8, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 +; X86-SSE2-NEXT: pand %xmm1, %xmm2 +; X86-SSE2-NEXT: pandn %xmm0, %xmm1 +; X86-SSE2-NEXT: por %xmm2, %xmm1 +; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v32i8_v16i8: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: pxor LCPI14_0, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE42-NEXT: psrlw $8, %xmm1 +; X86-SSE42-NEXT: pminub %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: xorb $-128, %al +; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v32i8_v16i8: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v32i8_v16i8: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 +; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 +; X64-SSE2-NEXT: pand %xmm2, %xmm0 +; X64-SSE2-NEXT: pandn %xmm1, %xmm2 +; X64-SSE2-NEXT: por %xmm0, %xmm2 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 +; X64-SSE2-NEXT: pand %xmm1, %xmm2 +; X64-SSE2-NEXT: pandn %xmm0, %xmm1 +; X64-SSE2-NEXT: por %xmm2, %xmm1 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE2-NEXT: psrld $16, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 +; X64-SSE2-NEXT: pand %xmm2, %xmm1 +; X64-SSE2-NEXT: pandn %xmm0, %xmm2 +; X64-SSE2-NEXT: por %xmm1, %xmm2 +; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 +; X64-SSE2-NEXT: psrlw $8, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 +; X64-SSE2-NEXT: pand %xmm1, %xmm2 +; X64-SSE2-NEXT: pandn %xmm0, %xmm1 +; X64-SSE2-NEXT: por %xmm2, %xmm1 +; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v32i8_v16i8: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE42-NEXT: psrlw $8, %xmm1 +; X64-SSE42-NEXT: pminub %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: xorb $-128, %al +; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v32i8_v16i8: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp slt <32 x i8> %a0, %1 + %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 + %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp slt <32 x i8> %3, %4 + %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp slt <32 x i8> %6, %7 + %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 + %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %11 = icmp slt <32 x i8> %9, %10 + %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 + %13 = extractelement <32 x i8> %12, i32 0 + ret i8 %13 +} + +define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { +; X86-SSE2-LABEL: test_reduce_v64i8_v16i8: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 +; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 +; X86-SSE2-NEXT: pand %xmm2, %xmm0 +; X86-SSE2-NEXT: pandn %xmm1, %xmm2 +; X86-SSE2-NEXT: por %xmm0, %xmm2 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 +; X86-SSE2-NEXT: pand %xmm1, %xmm2 +; X86-SSE2-NEXT: pandn %xmm0, %xmm1 +; X86-SSE2-NEXT: por %xmm2, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: psrld $16, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 +; X86-SSE2-NEXT: pand %xmm2, %xmm1 +; X86-SSE2-NEXT: pandn %xmm0, %xmm2 +; X86-SSE2-NEXT: por %xmm1, %xmm2 +; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 +; X86-SSE2-NEXT: psrlw $8, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 +; X86-SSE2-NEXT: pand %xmm1, %xmm2 +; X86-SSE2-NEXT: pandn %xmm0, %xmm1 +; X86-SSE2-NEXT: por %xmm2, %xmm1 +; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v64i8_v16i8: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: pxor LCPI15_0, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE42-NEXT: psrlw $8, %xmm1 +; X86-SSE42-NEXT: pminub %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: xorb $-128, %al +; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v64i8_v16i8: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v64i8_v16i8: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: movdqa %xmm1, %xmm2 +; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2 +; X64-SSE2-NEXT: pand %xmm2, %xmm0 +; X64-SSE2-NEXT: pandn %xmm1, %xmm2 +; X64-SSE2-NEXT: por %xmm0, %xmm2 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3] +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 +; X64-SSE2-NEXT: pand %xmm1, %xmm2 +; X64-SSE2-NEXT: pandn %xmm0, %xmm1 +; X64-SSE2-NEXT: por %xmm2, %xmm1 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE2-NEXT: psrld $16, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2 +; X64-SSE2-NEXT: pand %xmm2, %xmm1 +; X64-SSE2-NEXT: pandn %xmm0, %xmm2 +; X64-SSE2-NEXT: por %xmm1, %xmm2 +; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 +; X64-SSE2-NEXT: psrlw $8, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1 +; X64-SSE2-NEXT: pand %xmm1, %xmm2 +; X64-SSE2-NEXT: pandn %xmm0, %xmm1 +; X64-SSE2-NEXT: por %xmm2, %xmm1 +; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v64i8_v16i8: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE42-NEXT: psrlw $8, %xmm1 +; X64-SSE42-NEXT: pminub %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: xorb $-128, %al +; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v64i8_v16i8: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp slt <64 x i8> %a0, %1 + %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 + %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp slt <64 x i8> %3, %4 + %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 + %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp slt <64 x i8> %6, %7 + %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 + %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %11 = icmp slt <64 x i8> %9, %10 + %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 + %13 = extractelement <64 x i8> %12, i32 0 + ret i8 %13 +} diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll index 99038d7f3c1..05cd44d3885 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll @@ -2021,3 +2021,421 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) { %19 = extractelement <64 x i8> %18, i32 0 ret i8 %19 } + +; +; Partial Vector Reductions +; + +define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { +; X86-SSE2-LABEL: test_reduce_v16i16_v8i16: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: psrld $16, %xmm1 +; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 +; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v16i16_v8i16: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 +; X86-SSE42-NEXT: pxor %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: notl %eax +; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v16i16_v8i16: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v16i16_v8i16: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: psrld $16, %xmm1 +; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 +; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v16i16_v8i16: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-SSE42-NEXT: pxor %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: notl %eax +; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v16i16_v8i16: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp ugt <16 x i16> %a0, %1 + %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 + %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp ugt <16 x i16> %3, %4 + %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp ugt <16 x i16> %6, %7 + %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 + %10 = extractelement <16 x i16> %9, i32 0 + ret i16 %10 +} + +define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { +; X86-SSE2-LABEL: test_reduce_v32i16_v8i16: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: psrld $16, %xmm1 +; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 +; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v32i16_v8i16: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 +; X86-SSE42-NEXT: pxor %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: notl %eax +; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v32i16_v8i16: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v32i16_v8i16: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: psrld $16, %xmm1 +; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 +; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 +; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v32i16_v8i16: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-SSE42-NEXT: pxor %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: notl %eax +; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v32i16_v8i16: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp ugt <32 x i16> %a0, %1 + %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 + %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp ugt <32 x i16> %3, %4 + %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 + %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp ugt <32 x i16> %6, %7 + %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 + %10 = extractelement <32 x i16> %9, i32 0 + ret i16 %10 +} + +define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { +; X86-SSE2-LABEL: test_reduce_v32i8_v16i8: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: psrld $16, %xmm1 +; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: psrlw $8, %xmm0 +; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 +; X86-SSE2-NEXT: movd %xmm0, %eax +; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v32i8_v16i8: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 +; X86-SSE42-NEXT: pxor %xmm0, %xmm1 +; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE42-NEXT: psrlw $8, %xmm0 +; X86-SSE42-NEXT: pminub %xmm1, %xmm0 +; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: notb %al +; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v32i8_v16i8: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v32i8_v16i8: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: psrld $16, %xmm1 +; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE2-NEXT: psrlw $8, %xmm0 +; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 +; X64-SSE2-NEXT: movd %xmm0, %eax +; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v32i8_v16i8: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-SSE42-NEXT: pxor %xmm0, %xmm1 +; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE42-NEXT: psrlw $8, %xmm0 +; X64-SSE42-NEXT: pminub %xmm1, %xmm0 +; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: notb %al +; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v32i8_v16i8: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp ugt <32 x i8> %a0, %1 + %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 + %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp ugt <32 x i8> %3, %4 + %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp ugt <32 x i8> %6, %7 + %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 + %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %11 = icmp ugt <32 x i8> %9, %10 + %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 + %13 = extractelement <32 x i8> %12, i32 0 + ret i8 %13 +} + +define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { +; X86-SSE2-LABEL: test_reduce_v64i8_v16i8: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: psrld $16, %xmm1 +; X86-SSE2-NEXT: pmaxub %xmm0, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: psrlw $8, %xmm0 +; X86-SSE2-NEXT: pmaxub %xmm1, %xmm0 +; X86-SSE2-NEXT: movd %xmm0, %eax +; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v64i8_v16i8: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 +; X86-SSE42-NEXT: pxor %xmm0, %xmm1 +; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE42-NEXT: psrlw $8, %xmm0 +; X86-SSE42-NEXT: pminub %xmm1, %xmm0 +; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: notb %al +; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v64i8_v16i8: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v64i8_v16i8: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: psrld $16, %xmm1 +; X64-SSE2-NEXT: pmaxub %xmm0, %xmm1 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE2-NEXT: psrlw $8, %xmm0 +; X64-SSE2-NEXT: pmaxub %xmm1, %xmm0 +; X64-SSE2-NEXT: movd %xmm0, %eax +; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v64i8_v16i8: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-SSE42-NEXT: pxor %xmm0, %xmm1 +; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE42-NEXT: psrlw $8, %xmm0 +; X64-SSE42-NEXT: pminub %xmm1, %xmm0 +; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: notb %al +; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v64i8_v16i8: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp ugt <64 x i8> %a0, %1 + %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 + %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp ugt <64 x i8> %3, %4 + %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 + %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp ugt <64 x i8> %6, %7 + %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 + %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %11 = icmp ugt <64 x i8> %9, %10 + %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 + %13 = extractelement <64 x i8> %12, i32 0 + ret i8 %13 +} diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll index 5e4d83046d7..ed64ec2eb27 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll @@ -1885,3 +1885,397 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) { %19 = extractelement <64 x i8> %18, i32 0 ret i8 %19 } + +; +; Partial Vector Reductions +; + +define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) { +; X86-SSE2-LABEL: test_reduce_v16i16_v8i16: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: psrld $16, %xmm1 +; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 +; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v16i16_v8i16: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v16i16_v8i16: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v16i16_v8i16: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: psrld $16, %xmm1 +; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 +; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v16i16_v8i16: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v16i16_v8i16: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp ult <16 x i16> %a0, %1 + %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1 + %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp ult <16 x i16> %3, %4 + %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4 + %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp ult <16 x i16> %6, %7 + %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7 + %10 = extractelement <16 x i16> %9, i32 0 + ret i16 %10 +} + +define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) { +; X86-SSE2-LABEL: test_reduce_v32i16_v8i16: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: psrld $16, %xmm1 +; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 +; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v32i16_v8i16: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v32i16_v8i16: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v32i16_v8i16: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: psrld $16, %xmm1 +; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 +; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 +; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v32i16_v8i16: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 +; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v32i16_v8i16: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp ult <32 x i16> %a0, %1 + %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1 + %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp ult <32 x i16> %3, %4 + %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4 + %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp ult <32 x i16> %6, %7 + %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7 + %10 = extractelement <32 x i16> %9, i32 0 + ret i16 %10 +} + +define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) { +; X86-SSE2-LABEL: test_reduce_v32i8_v16i8: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: pminub %xmm0, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X86-SSE2-NEXT: pminub %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: psrld $16, %xmm1 +; X86-SSE2-NEXT: pminub %xmm0, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: psrlw $8, %xmm0 +; X86-SSE2-NEXT: pminub %xmm1, %xmm0 +; X86-SSE2-NEXT: movd %xmm0, %eax +; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v32i8_v16i8: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE42-NEXT: psrlw $8, %xmm1 +; X86-SSE42-NEXT: pminub %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v32i8_v16i8: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v32i8_v16i8: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: pminub %xmm0, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X64-SSE2-NEXT: pminub %xmm1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: psrld $16, %xmm1 +; X64-SSE2-NEXT: pminub %xmm0, %xmm1 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE2-NEXT: psrlw $8, %xmm0 +; X64-SSE2-NEXT: pminub %xmm1, %xmm0 +; X64-SSE2-NEXT: movd %xmm0, %eax +; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v32i8_v16i8: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE42-NEXT: psrlw $8, %xmm1 +; X64-SSE42-NEXT: pminub %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v32i8_v16i8: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp ult <32 x i8> %a0, %1 + %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1 + %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp ult <32 x i8> %3, %4 + %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4 + %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp ult <32 x i8> %6, %7 + %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7 + %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %11 = icmp ult <32 x i8> %9, %10 + %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10 + %13 = extractelement <32 x i8> %12, i32 0 + ret i8 %13 +} + +define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) { +; X86-SSE2-LABEL: test_reduce_v64i8_v16i8: +; X86-SSE2: ## %bb.0: +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-SSE2-NEXT: pminub %xmm0, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X86-SSE2-NEXT: pminub %xmm1, %xmm0 +; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE2-NEXT: psrld $16, %xmm1 +; X86-SSE2-NEXT: pminub %xmm0, %xmm1 +; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE2-NEXT: psrlw $8, %xmm0 +; X86-SSE2-NEXT: pminub %xmm1, %xmm0 +; X86-SSE2-NEXT: movd %xmm0, %eax +; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE2-NEXT: retl +; +; X86-SSE42-LABEL: test_reduce_v64i8_v16i8: +; X86-SSE42: ## %bb.0: +; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE42-NEXT: psrlw $8, %xmm1 +; X86-SSE42-NEXT: pminub %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X86-SSE42-NEXT: retl +; +; X86-AVX-LABEL: test_reduce_v64i8_v16i8: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl +; +; X64-SSE2-LABEL: test_reduce_v64i8_v16i8: +; X64-SSE2: ## %bb.0: +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-SSE2-NEXT: pminub %xmm0, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; X64-SSE2-NEXT: pminub %xmm1, %xmm0 +; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE2-NEXT: psrld $16, %xmm1 +; X64-SSE2-NEXT: pminub %xmm0, %xmm1 +; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE2-NEXT: psrlw $8, %xmm0 +; X64-SSE2-NEXT: pminub %xmm1, %xmm0 +; X64-SSE2-NEXT: movd %xmm0, %eax +; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE2-NEXT: retq +; +; X64-SSE42-LABEL: test_reduce_v64i8_v16i8: +; X64-SSE42: ## %bb.0: +; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE42-NEXT: psrlw $8, %xmm1 +; X64-SSE42-NEXT: pminub %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 +; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax +; X64-SSE42-NEXT: retq +; +; X64-AVX-LABEL: test_reduce_v64i8_v16i8: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrld $16, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq + %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %2 = icmp ult <64 x i8> %a0, %1 + %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1 + %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %5 = icmp ult <64 x i8> %3, %4 + %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4 + %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %8 = icmp ult <64 x i8> %6, %7 + %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7 + %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %11 = icmp ult <64 x i8> %9, %10 + %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10 + %13 = extractelement <64 x i8> %12, i32 0 + ret i8 %13 +} |

