diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-09-13 17:34:56 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-09-13 17:34:56 +0000 |
| commit | 1f9ddf48a6a000eb382a2e452baccc8e112ef619 (patch) | |
| tree | 148115a87e44c85bf035d9f16759124b807a4579 /llvm | |
| parent | e6be26c8d4106e8366804ef918ff6b366a69eb15 (diff) | |
| download | bcm5719-llvm-1f9ddf48a6a000eb382a2e452baccc8e112ef619.tar.gz bcm5719-llvm-1f9ddf48a6a000eb382a2e452baccc8e112ef619.zip | |
[X86][SSE] Added AVX512F and additional vector truncate test cases
trunc16i16_16i8 is currently commented out due to PR25684
llvm-svn: 281356
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc.ll | 361 |
1 files changed, 273 insertions, 88 deletions
diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll index 2df97f23e89..cd71e65eafa 100644 --- a/llvm/test/CodeGen/X86/vector-trunc.ll +++ b/llvm/test/CodeGen/X86/vector-trunc.ll @@ -4,7 +4,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) { ; SSE2-LABEL: trunc8i64_8i32: @@ -59,10 +60,10 @@ define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) { ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512BW-LABEL: trunc8i64_8i32: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc8i64_8i32: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: retq entry: %0 = trunc <8 x i64> %a to <8 x i32> ret <8 x i32> %0 @@ -147,10 +148,10 @@ define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512BW-LABEL: trunc8i64_8i16: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc8i64_8i16: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: retq entry: %0 = trunc <8 x i64> %a to <8 x i16> ret <8 x i16> %0 @@ -202,10 +203,10 @@ define void @trunc8i64_8i8(<8 x i64> %a) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512BW-LABEL: trunc8i64_8i8: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vpmovqb %zmm0, (%rax) -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc8i64_8i8: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovqb %zmm0, (%rax) +; AVX512-NEXT: retq entry: %0 = trunc <8 x i64> %a to <8 x i8> store <8 x i8> %0, <8 x i8>* undef, align 4 @@ -256,12 +257,12 @@ define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512BW-LABEL: trunc8i32_8i16: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc8i32_8i16: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; AVX512-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: retq entry: %0 = trunc <8 x i32> %a to <8 x i16> ret <8 x i16> %0 @@ -316,19 +317,103 @@ define void @trunc8i32_8i8(<8 x i32> %a) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512BW-LABEL: trunc8i32_8i8: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; AVX512BW-NEXT: vmovq %xmm0, (%rax) -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc8i32_8i8: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; AVX512-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512-NEXT: vmovq %xmm0, (%rax) +; AVX512-NEXT: retq entry: %0 = trunc <8 x i32> %a to <8 x i8> store <8 x i8> %0, <8 x i8>* undef, align 4 ret void } +define void @trunc16i32_16i16(<16 x i32> %a) { +; SSE2-LABEL: trunc16i32_16i16: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: pslld $16, %xmm3 +; SSE2-NEXT: psrad $16, %xmm3 +; SSE2-NEXT: pslld $16, %xmm2 +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: packssdw %xmm3, %xmm2 +; SSE2-NEXT: movdqu %xmm2, (%rax) +; SSE2-NEXT: movdqu %xmm0, (%rax) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc16i32_16i16: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: pslld $16, %xmm1 +; SSSE3-NEXT: psrad $16, %xmm1 +; SSSE3-NEXT: pslld $16, %xmm0 +; SSSE3-NEXT: psrad $16, %xmm0 +; SSSE3-NEXT: packssdw %xmm1, %xmm0 +; SSSE3-NEXT: pslld $16, %xmm3 +; SSSE3-NEXT: psrad $16, %xmm3 +; SSSE3-NEXT: pslld $16, %xmm2 +; SSSE3-NEXT: psrad $16, %xmm2 +; SSSE3-NEXT: packssdw %xmm3, %xmm2 +; SSSE3-NEXT: movdqu %xmm2, (%rax) +; SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i32_16i16: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: movdqu %xmm2, (%rax) +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i32_16i16: +; AVX1: # BB#0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1],xmm1[2],xmm3[3],xmm1[4],xmm3[5],xmm1[6],xmm3[7] +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1],xmm0[2],xmm3[3],xmm0[4],xmm3[5],xmm0[6],xmm3[7] +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovups %ymm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i32_16i16: +; AVX2: # BB#0: # %entry +; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128] +; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovdqu %ymm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc16i32_16i16: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovdw %zmm0, (%rax) +; AVX512-NEXT: retq +entry: + %0 = trunc <16 x i32> %a to <16 x i16> + store <16 x i16> %0, <16 x i16>* undef, align 4 + ret void +} + define void @trunc16i32_16i8(<16 x i32> %a) { ; SSE-LABEL: trunc16i32_16i8: ; SSE: # BB#0: # %entry @@ -374,16 +459,116 @@ define void @trunc16i32_16i8(<16 x i32> %a) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512BW-LABEL: trunc16i32_16i8: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vpmovdb %zmm0, (%rax) -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc16i32_16i8: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovdb %zmm0, (%rax) +; AVX512-NEXT: retq entry: %0 = trunc <16 x i32> %a to <16 x i8> store <16 x i8> %0, <16 x i8>* undef, align 4 ret void } +;PR25684 +;define void @trunc16i16_16i8(<16 x i16> %a) { +;entry: +; %0 = trunc <16 x i16> %a to <16 x i8> +; store <16 x i8> %0, <16 x i8>* undef, align 4 +; ret void +;} + +define void @trunc32i16_32i8(<32 x i16> %a) { +; SSE2-LABEL: trunc32i16_32i8: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] +; SSE2-NEXT: pand %xmm4, %xmm1 +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: packuswb %xmm1, %xmm0 +; SSE2-NEXT: pand %xmm4, %xmm3 +; SSE2-NEXT: pand %xmm4, %xmm2 +; SSE2-NEXT: packuswb %xmm3, %xmm2 +; SSE2-NEXT: movdqu %xmm2, (%rax) +; SSE2-NEXT: movdqu %xmm0, (%rax) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc32i16_32i8: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; SSSE3-NEXT: pshufb %xmm4, %xmm1 +; SSSE3-NEXT: pshufb %xmm4, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: pshufb %xmm4, %xmm3 +; SSSE3-NEXT: pshufb %xmm4, %xmm2 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSSE3-NEXT: movdqu %xmm2, (%rax) +; SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc32i16_32i8: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; SSE41-NEXT: pshufb %xmm4, %xmm1 +; SSE41-NEXT: pshufb %xmm4, %xmm0 +; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: pshufb %xmm4, %xmm3 +; SSE41-NEXT: pshufb %xmm4, %xmm2 +; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE41-NEXT: movdqu %xmm2, (%rax) +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc32i16_32i8: +; AVX1: # BB#0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovups %ymm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc32i16_32i8: +; AVX2: # BB#0: # %entry +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpshufb %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vmovdqu %ymm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc32i16_32i8: +; AVX512F: # BB#0: # %entry +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vmovdqu %ymm0, (%rax) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc32i16_32i8: +; AVX512BW: # BB#0: # %entry +; AVX512BW-NEXT: vpmovwb %zmm0, (%rax) +; AVX512BW-NEXT: retq +entry: + %0 = trunc <32 x i16> %a to <32 x i8> + store <32 x i8> %0, <32 x i8>* undef, align 4 + ret void +} + define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) { ; SSE2-LABEL: trunc2x4i64_8i32: ; SSE2: # BB#0: # %entry @@ -437,14 +622,14 @@ define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) { ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512BW-LABEL: trunc2x4i64_8i32: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> -; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc2x4i64_8i32: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> +; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: retq entry: %0 = trunc <4 x i64> %a to <4 x i32> %1 = trunc <4 x i64> %b to <4 x i32> @@ -539,17 +724,17 @@ define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512BW-LABEL: trunc2x4i64_8i16: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> -; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc2x4i64_8i16: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def> +; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> +; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX512-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: retq entry: %0 = trunc <4 x i64> %a to <4 x i16> %1 = trunc <4 x i64> %b to <4 x i16> @@ -593,12 +778,12 @@ define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) { ; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] ; AVX2-NEXT: retq ; -; AVX512BW-LABEL: trunc2x2i64_4i32: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc2x2i64_4i32: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2] +; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX512-NEXT: retq entry: %0 = trunc <2 x i64> %a to <2 x i32> %1 = trunc <2 x i64> %b to <2 x i32> @@ -619,11 +804,11 @@ define i64 @trunc2i64_i64(<2 x i64> %inval) { ; AVX-NEXT: vmovq %xmm0, %rax ; AVX-NEXT: retq ; -; AVX512BW-LABEL: trunc2i64_i64: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512BW-NEXT: vmovq %xmm0, %rax -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc2i64_i64: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512-NEXT: vmovq %xmm0, %rax +; AVX512-NEXT: retq entry: %0 = trunc <2 x i64> %inval to <2 x i32> %1 = bitcast <2 x i32> %0 to i64 @@ -666,13 +851,13 @@ define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) { ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq ; -; AVX512BW-LABEL: trunc2x4i32_8i16: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc2x4i32_8i16: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX512-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: retq entry: %0 = trunc <4 x i32> %a to <4 x i16> %1 = trunc <4 x i32> %b to <4 x i16> @@ -708,11 +893,11 @@ define i64 @trunc4i32_i64(<4 x i32> %inval) { ; AVX-NEXT: vmovq %xmm0, %rax ; AVX-NEXT: retq ; -; AVX512BW-LABEL: trunc4i32_i64: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] -; AVX512BW-NEXT: vmovq %xmm0, %rax -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc4i32_i64: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; AVX512-NEXT: vmovq %xmm0, %rax +; AVX512-NEXT: retq entry: %0 = trunc <4 x i32> %inval to <4 x i16> %1 = bitcast <4 x i16> %0 to i64 @@ -752,13 +937,13 @@ define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) { ; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq ; -; AVX512BW-LABEL: trunc2x8i16_16i8: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> -; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0 -; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc2x8i16_16i8: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; AVX512-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX512-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: retq entry: %0 = trunc <8 x i16> %a to <8 x i8> %1 = trunc <8 x i16> %b to <8 x i8> @@ -793,11 +978,11 @@ define i64 @trunc8i16_i64(<8 x i16> %inval) { ; AVX-NEXT: vmovq %xmm0, %rax ; AVX-NEXT: retq ; -; AVX512BW-LABEL: trunc8i16_i64: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; AVX512BW-NEXT: vmovq %xmm0, %rax -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc8i16_i64: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512-NEXT: vmovq %xmm0, %rax +; AVX512-NEXT: retq entry: %0 = trunc <8 x i16> %inval to <8 x i8> %1 = bitcast <8 x i8> %0 to i64 @@ -815,10 +1000,10 @@ define <16 x i8> @trunc16i64_16i8_const() { ; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512BW-LABEL: trunc16i64_16i8_const: -; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: retq +; AVX512-LABEL: trunc16i64_16i8_const: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: retq entry: %0 = trunc <16 x i64> zeroinitializer to <16 x i8> |

