diff options
-rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 1140 |
1 files changed, 570 insertions, 570 deletions
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 12cd5ad3624..bab9cfb253f 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512BW,AVX512BW-64 +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512BW-32 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { ; AVX512BW-LABEL: test_mask_packs_epi32_rr_512: @@ -8,10 +8,10 @@ define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi32_rr_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi32_rr_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) ret <32 x i16> %1 } @@ -24,12 +24,12 @@ define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, < ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi32_rrk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) %2 = bitcast i32 %mask to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru @@ -43,11 +43,11 @@ define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; AVX512BW-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi32_rrkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi32_rrkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) %2 = bitcast i32 %mask to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer @@ -60,11 +60,11 @@ define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_ ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi32_rm_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi32_rm_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) ret <32 x i16> %1 @@ -78,13 +78,13 @@ define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi32_rmk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackssdw (%eax), %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) %2 = bitcast i32 %mask to <32 x i1> @@ -99,12 +99,12 @@ define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %pt ; AVX512BW-NEXT: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi32_rmkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi32_rmkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) %2 = bitcast i32 %mask to <32 x i1> @@ -118,11 +118,11 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi32_rmb_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi32_rmb_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer @@ -138,13 +138,13 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <3 ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi32_rmbk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer @@ -161,12 +161,12 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i ; AVX512BW-NEXT: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi32_rmbkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi32_rmbkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer @@ -184,10 +184,10 @@ define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi16_rr_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi16_rr_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) ret <64 x i8> %1 } @@ -200,12 +200,12 @@ define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <6 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi16_rrk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) %2 = bitcast i64 %mask to <64 x i1> %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru @@ -219,11 +219,11 @@ define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; AVX512BW-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi16_rrkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) %2 = bitcast i64 %mask to <64 x i1> %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer @@ -236,11 +236,11 @@ define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi16_rm_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi16_rm_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) ret <64 x i8> %1 @@ -254,13 +254,13 @@ define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi16_rmk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpacksswb (%eax), %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) %2 = bitcast i64 %mask to <64 x i1> @@ -275,12 +275,12 @@ define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512BW-NEXT: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packs_epi16_rmkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) %2 = bitcast i64 %mask to <64 x i1> @@ -297,10 +297,10 @@ define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi32_rr_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi32_rr_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) ret <32 x i16> %1 } @@ -313,12 +313,12 @@ define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi32_rrk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) %2 = bitcast i32 %mask to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru @@ -332,11 +332,11 @@ define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; AVX512BW-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi32_rrkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi32_rrkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) %2 = bitcast i32 %mask to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer @@ -349,11 +349,11 @@ define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi32_rm_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi32_rm_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) ret <32 x i16> %1 @@ -367,13 +367,13 @@ define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %pt ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi32_rmk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackusdw (%eax), %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) %2 = bitcast i32 %mask to <32 x i1> @@ -388,12 +388,12 @@ define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %p ; AVX512BW-NEXT: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi32_rmkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi32_rmkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %b = load <16 x i32>, <16 x i32>* %ptr_b %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) %2 = bitcast i32 %mask to <32 x i1> @@ -407,11 +407,11 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi32_rmb_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi32_rmb_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer @@ -427,13 +427,13 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, < ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi32_rmbk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer @@ -450,12 +450,12 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, ; AVX512BW-NEXT: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi32_rmbkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi32_rmbkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %q = load i32, i32* %ptr_b %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer @@ -473,10 +473,10 @@ define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi16_rr_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi16_rr_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) ret <64 x i8> %1 } @@ -489,12 +489,12 @@ define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, < ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi16_rrk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) %2 = bitcast i64 %mask to <64 x i1> %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru @@ -508,11 +508,11 @@ define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, ; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi16_rrkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) %2 = bitcast i64 %mask to <64 x i1> %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer @@ -525,11 +525,11 @@ define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi16_rm_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi16_rm_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) ret <64 x i8> %1 @@ -543,13 +543,13 @@ define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi16_rmk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackuswb (%eax), %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) %2 = bitcast i64 %mask to <64 x i1> @@ -564,12 +564,12 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %pt ; AVX512BW-NEXT: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_packus_epi16_rmkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) %2 = bitcast i64 %mask to <64 x i1> @@ -585,10 +585,10 @@ define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_adds_epi16_rr_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res } @@ -601,12 +601,12 @@ define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <3 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_adds_epi16_rrk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res } @@ -618,11 +618,11 @@ define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; AVX512BW-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_adds_epi16_rrkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res } @@ -633,11 +633,11 @@ define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_adds_epi16_rm_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res @@ -651,13 +651,13 @@ define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_adds_epi16_rmk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res @@ -670,12 +670,12 @@ define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512BW-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_adds_epi16_rmkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res @@ -689,10 +689,10 @@ define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_subs_epi16_rr_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res } @@ -705,12 +705,12 @@ define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <3 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_subs_epi16_rrk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res } @@ -722,11 +722,11 @@ define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; AVX512BW-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_subs_epi16_rrkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res } @@ -737,11 +737,11 @@ define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_subs_epi16_rm_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res @@ -755,13 +755,13 @@ define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_subs_epi16_rmk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res @@ -774,12 +774,12 @@ define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512BW-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_subs_epi16_rmkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res @@ -793,10 +793,10 @@ define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_adds_epu16_rr_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res } @@ -809,12 +809,12 @@ define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <3 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_adds_epu16_rrk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res } @@ -826,11 +826,11 @@ define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; AVX512BW-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_adds_epu16_rrkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res } @@ -841,11 +841,11 @@ define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_adds_epu16_rm_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res @@ -859,13 +859,13 @@ define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_adds_epu16_rmk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpaddusw (%eax), %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res @@ -878,12 +878,12 @@ define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512BW-NEXT: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_adds_epu16_rmkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res @@ -897,10 +897,10 @@ define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_subs_epu16_rr_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res } @@ -913,12 +913,12 @@ define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <3 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_subs_epu16_rrk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res } @@ -930,11 +930,11 @@ define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; AVX512BW-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_subs_epu16_rrkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res } @@ -945,11 +945,11 @@ define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_subs_epu16_rm_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res @@ -963,13 +963,13 @@ define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_ ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_subs_epu16_rmk_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsubusw (%eax), %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) ret <32 x i16> %res @@ -982,12 +982,12 @@ define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr ; AVX512BW-NEXT: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_mask_subs_epu16_rmkz_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %b = load <32 x i16>, <32 x i16>* %ptr_b %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) ret <32 x i16> %res @@ -1005,14 +1005,14 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3 -; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm3 +; AVX512BW-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vpaddw %zmm3, %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) %2 = bitcast i32 %x3 to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1 @@ -1031,14 +1031,14 @@ define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <3 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3 -; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm3 +; AVX512BW-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm3 +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} +; AVX512BW-32-NEXT: vpaddw %zmm3, %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2) %2 = bitcast i32 %x3 to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer @@ -1059,14 +1059,14 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm3 -; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm1, %zmm3 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vmovdqa64 %zmm0, %zmm3 +; AVX512BW-32-NEXT: vpermt2w %zmm2, %zmm1, %zmm3 +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vpaddw %zmm3, %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2) %2 = bitcast i32 %x3 to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1 @@ -1083,10 +1083,10 @@ define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) { ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_pshuf_b_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) ret <64 x i8> %res } @@ -1099,12 +1099,12 @@ define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> % ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512_mask: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_pshuf_b_512_mask: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpshufb %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) %mask.cast = bitcast i64 %mask to <64 x i1> %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2 @@ -1118,11 +1118,11 @@ define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> ; AVX512BW-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_pshuf_b_512_maskz: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_pshuf_b_512_maskz: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1) %mask.cast = bitcast i64 %mask to <64 x i1> %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer @@ -1140,13 +1140,13 @@ define <32 x i16> @test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm3 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm3 +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1) %2 = bitcast i32 %x3 to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 @@ -1166,13 +1166,13 @@ define <32 x i16> @test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i1 ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm3 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm3 +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpmulhw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1) %2 = bitcast i32 %x3 to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 @@ -1192,13 +1192,13 @@ define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm3 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm3 +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1) %2 = bitcast i32 %x3 to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 @@ -1220,15 +1220,15 @@ define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm1 {%k1} -; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} -; AVX512F-32-NEXT: vpmovwb %zmm0, %ymm0 -; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpmovwb %zmm0, %ymm1 {%k1} +; AVX512BW-32-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} +; AVX512BW-32-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; AVX512BW-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX512BW-32-NEXT: retl %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) @@ -1247,13 +1247,13 @@ define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, ; AVX512BW-NEXT: vpmovwb %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) -; AVX512F-32-NEXT: vpmovwb %zmm0, (%eax) {%k1} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpmovwb %zmm0, (%eax) +; AVX512BW-32-NEXT: vpmovwb %zmm0, (%eax) {%k1} +; AVX512BW-32-NEXT: retl call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) ret void @@ -1272,15 +1272,15 @@ define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm1 {%k1} -; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} -; AVX512F-32-NEXT: vpmovswb %zmm0, %ymm0 -; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpmovswb %zmm0, %ymm1 {%k1} +; AVX512BW-32-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} +; AVX512BW-32-NEXT: vpmovswb %zmm0, %ymm0 +; AVX512BW-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; AVX512BW-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX512BW-32-NEXT: retl %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) @@ -1299,13 +1299,13 @@ define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpmovswb %zmm0, (%eax) -; AVX512F-32-NEXT: vpmovswb %zmm0, (%eax) {%k1} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpmovswb %zmm0, (%eax) +; AVX512BW-32-NEXT: vpmovswb %zmm0, (%eax) {%k1} +; AVX512BW-32-NEXT: retl call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) ret void @@ -1324,15 +1324,15 @@ define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} -; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} -; AVX512F-32-NEXT: vpmovuswb %zmm0, %ymm0 -; AVX512F-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 -; AVX512F-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpmovuswb %zmm0, %ymm1 {%k1} +; AVX512BW-32-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} +; AVX512BW-32-NEXT: vpmovuswb %zmm0, %ymm0 +; AVX512BW-32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; AVX512BW-32-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX512BW-32-NEXT: retl %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) @@ -1351,13 +1351,13 @@ define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1 ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: vpmovuswb %zmm0, (%eax) -; AVX512F-32-NEXT: vpmovuswb %zmm0, (%eax) {%k1} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512BW-32-NEXT: vpmovuswb %zmm0, (%eax) +; AVX512BW-32-NEXT: vpmovuswb %zmm0, (%eax) {%k1} +; AVX512BW-32-NEXT: retl call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) ret void @@ -1374,13 +1374,13 @@ define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x ; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm3 +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vpaddw %zmm3, %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1) %2 = bitcast i32 %x3 to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 @@ -1400,13 +1400,13 @@ define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i ; AVX512BW-NEXT: vpaddd %zmm3, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 -; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpaddd %zmm3, %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm3 +; AVX512BW-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vpaddd %zmm3, %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1) %2 = bitcast i16 %x3 to <16 x i1> %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2 @@ -1428,15 +1428,15 @@ define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> ; AVX512BW-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z} -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm2, %zmm2 -; AVX512F-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z} +; AVX512BW-32-NEXT: vpaddw %zmm3, %zmm2, %zmm2 +; AVX512BW-32-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4) %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1) @@ -1455,12 +1455,12 @@ define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> ; AVX512BW-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 -; AVX512F-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 -; AVX512F-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_psadb_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 +; AVX512BW-32-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 +; AVX512BW-32-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) %res2 = add <8 x i64> %res, %res1 @@ -1480,15 +1480,15 @@ define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> ; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrlv32hi: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_psrlv32hi: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512BW-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) @@ -1510,15 +1510,15 @@ define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> ; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_psrav32_hi: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpsravw %zmm1, %zmm0, %zmm3 +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512BW-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) @@ -1534,11 +1534,11 @@ define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 ; AVX512BW-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi_const: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51] -; AVX512F-32-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_psrav32_hi_const: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51] +; AVX512BW-32-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>, <32 x i16> <i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49, i16 1, i16 10, i16 35, i16 52, i16 69, i16 9, i16 16, i16 49>, <32 x i16> zeroinitializer, i32 -1) @@ -1558,15 +1558,15 @@ define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> ; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_psllv32hi: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_psllv32hi: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512BW-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) @@ -1588,15 +1588,15 @@ define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x ; AVX512BW-NEXT: vpaddw %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm3 -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} -; AVX512F-32-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 -; AVX512F-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_int_x86_avx512_mask_permvar_hi_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpermw %zmm0, %zmm1, %zmm3 +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpermw %zmm0, %zmm1, %zmm2 {%k1} +; AVX512BW-32-NEXT: vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; AVX512BW-32-NEXT: vpaddw %zmm3, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1) %2 = bitcast i32 %x3 to <32 x i1> %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2 @@ -1615,10 +1615,10 @@ define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) { ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_psll_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_psll_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] ret <32 x i16> %res } @@ -1630,12 +1630,12 @@ define <32 x i16> @test_x86_avx512_mask_psll_w_512(<32 x i16> %a0, <8 x i16> %a1 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_mask_psll_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_mask_psll_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] %mask.cast = bitcast i32 %mask to <32 x i1> %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru @@ -1648,11 +1648,11 @@ define <32 x i16> @test_x86_avx512_maskz_psll_w_512(<32 x i16> %a0, <8 x i16> %a ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_maskz_psll_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_maskz_psll_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] %mask.cast = bitcast i32 %mask to <32 x i1> %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer @@ -1667,10 +1667,10 @@ define <32 x i16> @test_x86_avx512_pslli_w_512(<32 x i16> %a0) { ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_pslli_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpsllw $7, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_pslli_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpsllw $7, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] ret <32 x i16> %res } @@ -1682,12 +1682,12 @@ define <32 x i16> @test_x86_avx512_mask_pslli_w_512(<32 x i16> %a0, <32 x i16> % ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_mask_pslli_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsllw $7, %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_mask_pslli_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsllw $7, %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] %mask.cast = bitcast i32 %mask to <32 x i1> %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru @@ -1700,11 +1700,11 @@ define <32 x i16> @test_x86_avx512_maskz_pslli_w_512(<32 x i16> %a0, i32 %mask) ; AVX512BW-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_maskz_pslli_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_maskz_pslli_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsllw $7, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] %mask.cast = bitcast i32 %mask to <32 x i1> %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer @@ -1719,10 +1719,10 @@ define <32 x i16> @test_x86_avx512_psra_w_512(<32 x i16> %a0, <8 x i16> %a1) { ; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_psra_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_psra_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] ret <32 x i16> %res } @@ -1734,12 +1734,12 @@ define <32 x i16> @test_x86_avx512_mask_psra_w_512(<32 x i16> %a0, <8 x i16> %a1 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_mask_psra_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_mask_psra_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] %mask.cast = bitcast i32 %mask to <32 x i1> %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru @@ -1752,11 +1752,11 @@ define <32 x i16> @test_x86_avx512_maskz_psra_w_512(<32 x i16> %a0, <8 x i16> %a ; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_maskz_psra_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_maskz_psra_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] %mask.cast = bitcast i32 %mask to <32 x i1> %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer @@ -1771,10 +1771,10 @@ define <32 x i16> @test_x86_avx512_psrai_w_512(<32 x i16> %a0) { ; AVX512BW-NEXT: vpsraw $7, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_psrai_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpsraw $7, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_psrai_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpsraw $7, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] ret <32 x i16> %res } @@ -1786,12 +1786,12 @@ define <32 x i16> @test_x86_avx512_mask_psrai_w_512(<32 x i16> %a0, <32 x i16> % ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_mask_psrai_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsraw $7, %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_mask_psrai_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsraw $7, %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] %mask.cast = bitcast i32 %mask to <32 x i1> %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru @@ -1804,11 +1804,11 @@ define <32 x i16> @test_x86_avx512_maskz_psrai_w_512(<32 x i16> %a0, <32 x i16> ; AVX512BW-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_maskz_psrai_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_maskz_psrai_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsraw $7, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] %mask.cast = bitcast i32 %mask to <32 x i1> %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer @@ -1823,10 +1823,10 @@ define <32 x i16> @test_x86_avx512_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1) { ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_psrl_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_psrl_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] ret <32 x i16> %res } @@ -1838,12 +1838,12 @@ define <32 x i16> @test_x86_avx512_mask_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1 ; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_mask_psrl_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm2, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_mask_psrl_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm2, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] %mask.cast = bitcast i32 %mask to <32 x i1> %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru @@ -1856,11 +1856,11 @@ define <32 x i16> @test_x86_avx512_maskz_psrl_w_512(<32 x i16> %a0, <8 x i16> %a ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_maskz_psrl_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_maskz_psrl_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] %mask.cast = bitcast i32 %mask to <32 x i1> %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer @@ -1875,10 +1875,10 @@ define <32 x i16> @test_x86_avx512_psrli_w_512(<32 x i16> %a0) { ; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_psrli_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: vpsrlw $7, %zmm0, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_psrli_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: vpsrlw $7, %zmm0, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] ret <32 x i16> %res } @@ -1890,12 +1890,12 @@ define <32 x i16> @test_x86_avx512_mask_psrli_w_512(<32 x i16> %a0, <32 x i16> % ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_mask_psrli_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsrlw $7, %zmm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm0 -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_mask_psrli_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsrlw $7, %zmm0, %zmm1 {%k1} +; AVX512BW-32-NEXT: vmovdqa64 %zmm1, %zmm0 +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] %mask.cast = bitcast i32 %mask to <32 x i1> %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru @@ -1908,11 +1908,11 @@ define <32 x i16> @test_x86_avx512_maskz_psrli_w_512(<32 x i16> %a0, i32 %mask) ; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z} ; AVX512BW-NEXT: retq ; -; AVX512F-32-LABEL: test_x86_avx512_maskz_psrli_w_512: -; AVX512F-32: # %bb.0: -; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z} -; AVX512F-32-NEXT: retl +; AVX512BW-32-LABEL: test_x86_avx512_maskz_psrli_w_512: +; AVX512BW-32: # %bb.0: +; AVX512BW-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512BW-32-NEXT: vpsrlw $7, %zmm0, %zmm0 {%k1} {z} +; AVX512BW-32-NEXT: retl %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1] %mask.cast = bitcast i32 %mask to <32 x i1> %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer |