diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-trunc-math.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc-math.ll | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll index 41a9aabd7a7..ab34ad6a613 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-math.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll @@ -42,6 +42,7 @@ define <4 x i32> @trunc_add_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = add <4 x i64> %a0, %a1 %2 = trunc <4 x i64> %1 to <4 x i32> @@ -109,6 +110,7 @@ define <8 x i16> @trunc_add_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = add <8 x i64> %a0, %a1 %2 = trunc <8 x i64> %1 to <8 x i16> @@ -154,6 +156,7 @@ define <8 x i16> @trunc_add_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = add <8 x i32> %a0, %a1 %2 = trunc <8 x i32> %1 to <8 x i16> @@ -262,6 +265,7 @@ define <16 x i8> @trunc_add_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_add_v16i64_v16i8: @@ -272,6 +276,7 @@ define <16 x i8> @trunc_add_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_add_v16i64_v16i8: @@ -282,6 +287,7 @@ define <16 x i8> @trunc_add_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = add <16 x i64> %a0, %a1 %2 = trunc <16 x i64> %1 to <16 x i8> @@ -346,6 +352,7 @@ define <16 x i8> @trunc_add_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin ; AVX512: # BB#0: ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = add <16 x i32> %a0, %a1 %2 = trunc <16 x i32> %1 to <16 x i8> @@ -392,6 +399,7 @@ define <16 x i8> @trunc_add_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512F-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_add_v16i16_v16i8: @@ -399,6 +407,7 @@ define <16 x i8> @trunc_add_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512BW-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_add_v16i16_v16i8: @@ -406,6 +415,7 @@ define <16 x i8> @trunc_add_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512DQ-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = add <16 x i16> %a0, %a1 %2 = trunc <16 x i16> %1 to <16 x i8> @@ -452,6 +462,7 @@ define <8 x i16> @trunc_add_v8i32_v8i16_sext_8i8(<16 x i8> %a0, <8 x i32> %a1) { ; AVX512-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512-NEXT: vpmovsxbw %xmm0, %xmm0 ; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = sext <8 x i8> %1 to <8 x i32> @@ -492,6 +503,7 @@ define <4 x i32> @trunc_add_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = add <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> %2 = trunc <4 x i64> %1 to <4 x i32> @@ -549,6 +561,7 @@ define <8 x i16> @trunc_add_const_v8i64_v8i16(<8 x i64> %a0) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = add <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> %2 = trunc <8 x i64> %1 to <8 x i16> @@ -590,6 +603,7 @@ define <8 x i16> @trunc_add_const_v8i32_v8i16(<8 x i32> %a0) nounwind { ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = add <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = trunc <8 x i32> %1 to <8 x i16> @@ -676,6 +690,7 @@ define <16 x i8> @trunc_add_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_add_const_v16i64_v16i8: @@ -685,6 +700,7 @@ define <16 x i8> @trunc_add_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512BW-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_add_const_v16i64_v16i8: @@ -694,6 +710,7 @@ define <16 x i8> @trunc_add_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = add <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> %2 = trunc <16 x i64> %1 to <16 x i8> @@ -749,6 +766,7 @@ define <16 x i8> @trunc_add_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = add <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %2 = trunc <16 x i32> %1 to <16 x i8> @@ -792,6 +810,7 @@ define <16 x i8> @trunc_add_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_add_const_v16i16_v16i8: @@ -799,6 +818,7 @@ define <16 x i8> @trunc_add_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_add_const_v16i16_v16i8: @@ -806,6 +826,7 @@ define <16 x i8> @trunc_add_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = add <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> %2 = trunc <16 x i16> %1 to <16 x i8> @@ -848,6 +869,7 @@ define <4 x i32> @trunc_sub_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; AVX512-NEXT: vpsubq %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = sub <4 x i64> %a0, %a1 %2 = trunc <4 x i64> %1 to <4 x i32> @@ -915,6 +937,7 @@ define <8 x i16> @trunc_sub_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpsubq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = sub <8 x i64> %a0, %a1 %2 = trunc <8 x i64> %1 to <8 x i16> @@ -960,6 +983,7 @@ define <8 x i16> @trunc_sub_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = sub <8 x i32> %a0, %a1 %2 = trunc <8 x i32> %1 to <8 x i16> @@ -1068,6 +1092,7 @@ define <16 x i8> @trunc_sub_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_sub_v16i64_v16i8: @@ -1078,6 +1103,7 @@ define <16 x i8> @trunc_sub_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_sub_v16i64_v16i8: @@ -1088,6 +1114,7 @@ define <16 x i8> @trunc_sub_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = sub <16 x i64> %a0, %a1 %2 = trunc <16 x i64> %1 to <16 x i8> @@ -1152,6 +1179,7 @@ define <16 x i8> @trunc_sub_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin ; AVX512: # BB#0: ; AVX512-NEXT: vpsubd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = sub <16 x i32> %a0, %a1 %2 = trunc <16 x i32> %1 to <16 x i8> @@ -1198,6 +1226,7 @@ define <16 x i8> @trunc_sub_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512F-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_sub_v16i16_v16i8: @@ -1205,6 +1234,7 @@ define <16 x i8> @trunc_sub_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512BW-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_sub_v16i16_v16i8: @@ -1212,6 +1242,7 @@ define <16 x i8> @trunc_sub_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512DQ-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = sub <16 x i16> %a0, %a1 %2 = trunc <16 x i16> %1 to <16 x i8> @@ -1259,6 +1290,7 @@ define <4 x i32> @trunc_sub_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; AVX512-NEXT: vpsubq {{.*}}(%rip), %ymm0, %ymm0 ; AVX512-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = sub <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> %2 = trunc <4 x i64> %1 to <4 x i32> @@ -1330,6 +1362,7 @@ define <8 x i16> @trunc_sub_const_v8i64_v8i16(<8 x i64> %a0) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = sub <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> %2 = trunc <8 x i64> %1 to <8 x i16> @@ -1374,6 +1407,7 @@ define <8 x i16> @trunc_sub_const_v8i32_v8i16(<8 x i32> %a0) nounwind { ; AVX512-NEXT: vpsubd {{.*}}(%rip), %ymm0, %ymm0 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = sub <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = trunc <8 x i32> %1 to <8 x i16> @@ -1484,6 +1518,7 @@ define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_sub_const_v16i64_v16i8: @@ -1494,6 +1529,7 @@ define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_sub_const_v16i64_v16i8: @@ -1504,6 +1540,7 @@ define <16 x i8> @trunc_sub_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = sub <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> %2 = trunc <16 x i64> %1 to <16 x i8> @@ -1566,6 +1603,7 @@ define <16 x i8> @trunc_sub_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpsubd {{.*}}(%rip), %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = sub <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %2 = trunc <16 x i32> %1 to <16 x i8> @@ -1611,6 +1649,7 @@ define <16 x i8> @trunc_sub_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512F-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_sub_const_v16i16_v16i8: @@ -1618,6 +1657,7 @@ define <16 x i8> @trunc_sub_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512BW-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_sub_const_v16i16_v16i8: @@ -1625,6 +1665,7 @@ define <16 x i8> @trunc_sub_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512DQ-NEXT: vpsubw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = sub <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> %2 = trunc <16 x i16> %1 to <16 x i8> @@ -1688,6 +1729,7 @@ define <4 x i32> @trunc_mul_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512F-NEXT: vpmulld %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_mul_v4i64_v4i32: @@ -1697,6 +1739,7 @@ define <4 x i32> @trunc_mul_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512BW-NEXT: vpmulld %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_mul_v4i64_v4i32: @@ -1706,6 +1749,7 @@ define <4 x i32> @trunc_mul_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = mul <4 x i64> %a0, %a1 %2 = trunc <4 x i64> %1 to <4 x i32> @@ -1792,6 +1836,7 @@ define <8 x i16> @trunc_mul_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; AVX512F-NEXT: vpmovqw %zmm1, %xmm1 ; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512F-NEXT: vpmullw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_mul_v8i64_v8i16: @@ -1799,12 +1844,14 @@ define <8 x i16> @trunc_mul_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1 ; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512BW-NEXT: vpmullw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_mul_v8i64_v8i16: ; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = mul <8 x i64> %a0, %a1 %2 = trunc <8 x i64> %1 to <8 x i16> @@ -1862,6 +1909,7 @@ define <8 x i16> @trunc_mul_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; AVX512-NEXT: vpmulld %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = mul <8 x i32> %a0, %a1 %2 = trunc <8 x i32> %1 to <8 x i16> @@ -2116,6 +2164,7 @@ define <16 x i8> @trunc_mul_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512F-NEXT: vpmulld %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_mul_v16i64_v16i8: @@ -2128,6 +2177,7 @@ define <16 x i8> @trunc_mul_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512BW-NEXT: vpmulld %ymm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_mul_v16i64_v16i8: @@ -2138,6 +2188,7 @@ define <16 x i8> @trunc_mul_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = mul <16 x i64> %a0, %a1 %2 = trunc <16 x i64> %1 to <16 x i8> @@ -2226,6 +2277,7 @@ define <16 x i8> @trunc_mul_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin ; AVX512: # BB#0: ; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = mul <16 x i32> %a0, %a1 %2 = trunc <16 x i32> %1 to <16 x i8> @@ -2272,6 +2324,7 @@ define <16 x i8> @trunc_mul_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_mul_v16i16_v16i8: @@ -2279,6 +2332,7 @@ define <16 x i8> @trunc_mul_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_mul_v16i16_v16i8: @@ -2286,6 +2340,7 @@ define <16 x i8> @trunc_mul_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512DQ-NEXT: vpmullw %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = mul <16 x i16> %a0, %a1 %2 = trunc <16 x i16> %1 to <16 x i8> @@ -2332,6 +2387,7 @@ define <8 x i16> @trunc_mul_v8i32_v8i16_zext_8i8(<16 x i8> %a0, <8 x i32> %a1) { ; AVX512-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = zext <8 x i8> %1 to <8 x i32> @@ -2387,6 +2443,7 @@ define <4 x i32> @trunc_mul_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = mul <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> %2 = trunc <4 x i64> %1 to <4 x i32> @@ -2444,6 +2501,7 @@ define <8 x i16> @trunc_mul_const_v8i64_v8i16(<8 x i64> %a0) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = mul <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> %2 = trunc <8 x i64> %1 to <8 x i16> @@ -2485,6 +2543,7 @@ define <8 x i16> @trunc_mul_const_v8i32_v8i16(<8 x i32> %a0) nounwind { ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: vpmullw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = mul <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = trunc <8 x i32> %1 to <8 x i16> @@ -2681,6 +2740,7 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512F-NEXT: vpmulld {{.*}}(%rip), %ymm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_mul_const_v16i64_v16i8: @@ -2691,6 +2751,7 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512BW-NEXT: vpmulld {{.*}}(%rip), %ymm1, %ymm1 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_mul_const_v16i64_v16i8: @@ -2701,6 +2762,7 @@ define <16 x i8> @trunc_mul_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512DQ-NEXT: vpmulld {{.*}}(%rip), %ymm1, %ymm1 ; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = mul <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> %2 = trunc <16 x i64> %1 to <16 x i8> @@ -2791,6 +2853,7 @@ define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpmulld {{.*}}(%rip), %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = mul <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %2 = trunc <16 x i32> %1 to <16 x i8> @@ -2836,6 +2899,7 @@ define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_mul_const_v16i16_v16i8: @@ -2843,6 +2907,7 @@ define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_mul_const_v16i16_v16i8: @@ -2850,6 +2915,7 @@ define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512DQ-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = mul <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> %2 = trunc <16 x i16> %1 to <16 x i8> @@ -2890,6 +2956,7 @@ define <4 x i32> @trunc_and_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = and <4 x i64> %a0, %a1 %2 = trunc <4 x i64> %1 to <4 x i32> @@ -2953,6 +3020,7 @@ define <8 x i16> @trunc_and_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = and <8 x i64> %a0, %a1 %2 = trunc <8 x i64> %1 to <8 x i16> @@ -2996,6 +3064,7 @@ define <8 x i16> @trunc_and_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = and <8 x i32> %a0, %a1 %2 = trunc <8 x i32> %1 to <8 x i16> @@ -3096,6 +3165,7 @@ define <16 x i8> @trunc_and_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_and_v16i64_v16i8: @@ -3106,6 +3176,7 @@ define <16 x i8> @trunc_and_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_and_v16i64_v16i8: @@ -3116,6 +3187,7 @@ define <16 x i8> @trunc_and_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = and <16 x i64> %a0, %a1 %2 = trunc <16 x i64> %1 to <16 x i8> @@ -3176,6 +3248,7 @@ define <16 x i8> @trunc_and_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin ; AVX512: # BB#0: ; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = and <16 x i32> %a0, %a1 %2 = trunc <16 x i32> %1 to <16 x i8> @@ -3220,6 +3293,7 @@ define <16 x i8> @trunc_and_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_and_v16i16_v16i8: @@ -3227,6 +3301,7 @@ define <16 x i8> @trunc_and_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512BW-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_and_v16i16_v16i8: @@ -3234,6 +3309,7 @@ define <16 x i8> @trunc_and_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = and <16 x i16> %a0, %a1 %2 = trunc <16 x i16> %1 to <16 x i8> @@ -3272,6 +3348,7 @@ define <4 x i32> @trunc_and_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = and <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> %2 = trunc <4 x i64> %1 to <4 x i32> @@ -3329,6 +3406,7 @@ define <8 x i16> @trunc_and_const_v8i64_v8i16(<8 x i64> %a0) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = and <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> %2 = trunc <8 x i64> %1 to <8 x i16> @@ -3370,6 +3448,7 @@ define <8 x i16> @trunc_and_const_v8i32_v8i16(<8 x i32> %a0) nounwind { ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = and <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = trunc <8 x i32> %1 to <8 x i16> @@ -3456,6 +3535,7 @@ define <16 x i8> @trunc_and_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_and_const_v16i64_v16i8: @@ -3465,6 +3545,7 @@ define <16 x i8> @trunc_and_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_and_const_v16i64_v16i8: @@ -3474,6 +3555,7 @@ define <16 x i8> @trunc_and_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = and <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> %2 = trunc <16 x i64> %1 to <16 x i8> @@ -3529,6 +3611,7 @@ define <16 x i8> @trunc_and_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = and <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %2 = trunc <16 x i32> %1 to <16 x i8> @@ -3572,6 +3655,7 @@ define <16 x i8> @trunc_and_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_and_const_v16i16_v16i8: @@ -3579,6 +3663,7 @@ define <16 x i8> @trunc_and_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_and_const_v16i16_v16i8: @@ -3586,6 +3671,7 @@ define <16 x i8> @trunc_and_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = and <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> %2 = trunc <16 x i16> %1 to <16 x i8> @@ -3626,6 +3712,7 @@ define <4 x i32> @trunc_xor_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = xor <4 x i64> %a0, %a1 %2 = trunc <4 x i64> %1 to <4 x i32> @@ -3689,6 +3776,7 @@ define <8 x i16> @trunc_xor_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = xor <8 x i64> %a0, %a1 %2 = trunc <8 x i64> %1 to <8 x i16> @@ -3732,6 +3820,7 @@ define <8 x i16> @trunc_xor_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; AVX512-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = xor <8 x i32> %a0, %a1 %2 = trunc <8 x i32> %1 to <8 x i16> @@ -3832,6 +3921,7 @@ define <16 x i8> @trunc_xor_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_xor_v16i64_v16i8: @@ -3842,6 +3932,7 @@ define <16 x i8> @trunc_xor_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_xor_v16i64_v16i8: @@ -3852,6 +3943,7 @@ define <16 x i8> @trunc_xor_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwin ; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = xor <16 x i64> %a0, %a1 %2 = trunc <16 x i64> %1 to <16 x i8> @@ -3912,6 +4004,7 @@ define <16 x i8> @trunc_xor_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwin ; AVX512: # BB#0: ; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = xor <16 x i32> %a0, %a1 %2 = trunc <16 x i32> %1 to <16 x i8> @@ -3956,6 +4049,7 @@ define <16 x i8> @trunc_xor_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512F-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_xor_v16i16_v16i8: @@ -3963,6 +4057,7 @@ define <16 x i8> @trunc_xor_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512BW-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_xor_v16i16_v16i8: @@ -3970,6 +4065,7 @@ define <16 x i8> @trunc_xor_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwin ; AVX512DQ-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = xor <16 x i16> %a0, %a1 %2 = trunc <16 x i16> %1 to <16 x i8> @@ -4008,6 +4104,7 @@ define <4 x i32> @trunc_xor_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = xor <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> %2 = trunc <4 x i64> %1 to <4 x i32> @@ -4065,6 +4162,7 @@ define <8 x i16> @trunc_xor_const_v8i64_v8i16(<8 x i64> %a0) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = xor <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> %2 = trunc <8 x i64> %1 to <8 x i16> @@ -4106,6 +4204,7 @@ define <8 x i16> @trunc_xor_const_v8i32_v8i16(<8 x i32> %a0) nounwind { ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = xor <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = trunc <8 x i32> %1 to <8 x i16> @@ -4192,6 +4291,7 @@ define <16 x i8> @trunc_xor_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_xor_const_v16i64_v16i8: @@ -4201,6 +4301,7 @@ define <16 x i8> @trunc_xor_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512BW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_xor_const_v16i64_v16i8: @@ -4210,6 +4311,7 @@ define <16 x i8> @trunc_xor_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = xor <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> %2 = trunc <16 x i64> %1 to <16 x i8> @@ -4265,6 +4367,7 @@ define <16 x i8> @trunc_xor_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = xor <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %2 = trunc <16 x i32> %1 to <16 x i8> @@ -4308,6 +4411,7 @@ define <16 x i8> @trunc_xor_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_xor_const_v16i16_v16i8: @@ -4315,6 +4419,7 @@ define <16 x i8> @trunc_xor_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_xor_const_v16i16_v16i8: @@ -4322,6 +4427,7 @@ define <16 x i8> @trunc_xor_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = xor <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> %2 = trunc <16 x i16> %1 to <16 x i8> @@ -4362,6 +4468,7 @@ define <4 x i32> @trunc_or_v4i64_v4i32(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = or <4 x i64> %a0, %a1 %2 = trunc <4 x i64> %1 to <4 x i32> @@ -4425,6 +4532,7 @@ define <8 x i16> @trunc_or_v8i64_v8i16(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = or <8 x i64> %a0, %a1 %2 = trunc <8 x i64> %1 to <8 x i16> @@ -4468,6 +4576,7 @@ define <8 x i16> @trunc_or_v8i32_v8i16(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = or <8 x i32> %a0, %a1 %2 = trunc <8 x i32> %1 to <8 x i16> @@ -4568,6 +4677,7 @@ define <16 x i8> @trunc_or_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind ; AVX512F-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_or_v16i64_v16i8: @@ -4578,6 +4688,7 @@ define <16 x i8> @trunc_or_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_or_v16i64_v16i8: @@ -4588,6 +4699,7 @@ define <16 x i8> @trunc_or_v16i64_v16i8(<16 x i64> %a0, <16 x i64> %a1) nounwind ; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 ; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = or <16 x i64> %a0, %a1 %2 = trunc <16 x i64> %1 to <16 x i8> @@ -4648,6 +4760,7 @@ define <16 x i8> @trunc_or_v16i32_v16i8(<16 x i32> %a0, <16 x i32> %a1) nounwind ; AVX512: # BB#0: ; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = or <16 x i32> %a0, %a1 %2 = trunc <16 x i32> %1 to <16 x i8> @@ -4692,6 +4805,7 @@ define <16 x i8> @trunc_or_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_or_v16i16_v16i8: @@ -4699,6 +4813,7 @@ define <16 x i8> @trunc_or_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind ; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_or_v16i16_v16i8: @@ -4706,6 +4821,7 @@ define <16 x i8> @trunc_or_v16i16_v16i8(<16 x i16> %a0, <16 x i16> %a1) nounwind ; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = or <16 x i16> %a0, %a1 %2 = trunc <16 x i16> %1 to <16 x i8> @@ -4744,6 +4860,7 @@ define <4 x i32> @trunc_or_const_v4i64_v4i32(<4 x i64> %a0) nounwind { ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512-NEXT: vpmovqd %zmm0, %ymm0 ; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = or <4 x i64> %a0, <i64 0, i64 1, i64 2, i64 3> %2 = trunc <4 x i64> %1 to <4 x i32> @@ -4801,6 +4918,7 @@ define <8 x i16> @trunc_or_const_v8i64_v8i16(<8 x i64> %a0) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = or <8 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7> %2 = trunc <8 x i64> %1 to <8 x i16> @@ -4842,6 +4960,7 @@ define <8 x i16> @trunc_or_const_v8i32_v8i16(<8 x i32> %a0) nounwind { ; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = or <8 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %2 = trunc <8 x i32> %1 to <8 x i16> @@ -4928,6 +5047,7 @@ define <16 x i8> @trunc_or_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_or_const_v16i64_v16i8: @@ -4937,6 +5057,7 @@ define <16 x i8> @trunc_or_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512BW-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_or_const_v16i64_v16i8: @@ -4946,6 +5067,7 @@ define <16 x i8> @trunc_or_const_v16i64_v16i8(<16 x i64> %a0) nounwind { ; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = or <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15> %2 = trunc <16 x i64> %1 to <16 x i8> @@ -5001,6 +5123,7 @@ define <16 x i8> @trunc_or_const_v16i32_v16i8(<16 x i32> %a0) nounwind { ; AVX512: # BB#0: ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = or <16 x i32> %a0, <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %2 = trunc <16 x i32> %1 to <16 x i8> @@ -5044,6 +5167,7 @@ define <16 x i8> @trunc_or_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: trunc_or_const_v16i16_v16i8: @@ -5051,6 +5175,7 @@ define <16 x i8> @trunc_or_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: trunc_or_const_v16i16_v16i8: @@ -5058,6 +5183,7 @@ define <16 x i8> @trunc_or_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq %1 = or <16 x i16> %a0, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> %2 = trunc <16 x i16> %1 to <16 x i8> |

