diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll | 152 |
1 files changed, 83 insertions, 69 deletions
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll index 16497701c90..0e687739eb4 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl --show-mc-encoding| FileCheck %s declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16) @@ -9,8 +9,8 @@ define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> ; CHECK-NEXT: vpbroadcastb %edi, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xcf] ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpbroadcastb %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] -; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; CHECK-NEXT: vpbroadcastb %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf] +; CHECK-NEXT: vpbroadcastb %edi, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7] +; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] ; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1) @@ -30,8 +30,8 @@ define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16 ; CHECK-NEXT: vpbroadcastw %edi, %xmm1 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xcf] ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpbroadcastw %edi, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] -; CHECK-NEXT: vpbroadcastw %edi, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf] +; CHECK-NEXT: vpbroadcastw %edi, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7] +; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] ; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1) @@ -51,8 +51,8 @@ define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16 ; CHECK-NEXT: vpbroadcastb %edi, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xcf] ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpbroadcastb %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] -; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] -; CHECK-NEXT: vpbroadcastb %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf] +; CHECK-NEXT: vpbroadcastb %edi, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7] +; CHECK-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2] ; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1) @@ -73,8 +73,8 @@ declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i ; CHECK-NEXT: vpbroadcastw %edi, %ymm1 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xcf] ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpbroadcastw %edi, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] -; CHECK-NEXT: vpbroadcastw %edi, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf] +; CHECK-NEXT: vpbroadcastw %edi, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7] +; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1) @@ -93,9 +93,9 @@ define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0xd0] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8] -; CHECK-NEXT: vpaddb %ymm1, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc9] ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0] -; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1] +; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] +; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1) %res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) @@ -113,9 +113,9 @@ define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xd0] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8] -; CHECK-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc9] ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0] -; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) %res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) @@ -133,9 +133,9 @@ define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xd0] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8] -; CHECK-NEXT: vpaddw %ymm1, %ymm2, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc9] ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0] -; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1) %res1 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) @@ -153,9 +153,9 @@ define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xd0] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8] -; CHECK-NEXT: vpaddw %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc9] ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0] -; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) %res1 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) @@ -173,9 +173,9 @@ define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x78,0xd0] ; CHECK-NEXT: kmovq %rdi, %k1 ## encoding: [0xc4,0xe1,0xfb,0x92,0xcf] ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8] -; CHECK-NEXT: vpaddb %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc9] ; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0] -; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1] +; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] +; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1) %res1 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) @@ -193,9 +193,9 @@ define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x79,0xd0] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8] -; CHECK-NEXT: vpaddw %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc9] ; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0] -; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] +; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1) %res1 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) @@ -227,6 +227,7 @@ define void@test_int_x86_avx512_mask_storeu_b_256(i8* %ptr1, i8* %ptr2, <32 x i8 ; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca] ; CHECK-NEXT: vmovdqu8 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07] ; CHECK-NEXT: vmovdqu %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr1, <32 x i8> %x1, i32 %x2) call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr2, <32 x i8> %x1, i32 -1) @@ -255,6 +256,7 @@ define void@test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i1 ; CHECK-NEXT: kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca] ; CHECK-NEXT: vmovdqu16 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07] ; CHECK-NEXT: vmovdqu %ymm0, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr1, <16 x i16> %x1, i16 %x2) call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr2, <16 x i16> %x1, i16 -1) @@ -345,8 +347,8 @@ define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> % ; CHECK-NEXT: ## xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] ; CHECK-NEXT: vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1] -; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3] +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4) %res1 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4) @@ -368,8 +370,8 @@ define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> % ; CHECK-NEXT: ## ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] ; CHECK-NEXT: vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17] -; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] ; CHECK-NEXT: vpaddb %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3] +; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4) %res1 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4) @@ -391,8 +393,8 @@ define <8 x i16>@test_int_x86_avx512_mask_pshufh_w_128(<8 x i16> %x0, i32 %x1, < ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4] ; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) @@ -414,8 +416,8 @@ define <16 x i16>@test_int_x86_avx512_mask_pshufh_w_256(<16 x i16> %x0, i32 %x1, ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] ; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) @@ -437,8 +439,8 @@ define <8 x i16>@test_int_x86_avx512_mask_pshufl_w_128(<8 x i16> %x0, i32 %x1, < ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7] ; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) @@ -460,8 +462,8 @@ define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1, ; CHECK-NEXT: ## ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] ; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03] ; CHECK-NEXT: ## ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) @@ -476,6 +478,7 @@ define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) ret i32 %res @@ -487,6 +490,7 @@ define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) ret i32 %res @@ -500,6 +504,7 @@ define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) ret i16 %res @@ -512,6 +517,7 @@ define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { ; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) ret i16 %res @@ -524,6 +530,7 @@ define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) { ; CHECK: ## BB#0: ; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1) ret i32 %res @@ -535,6 +542,7 @@ define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) ret i32 %res @@ -548,6 +556,7 @@ define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1) ret i16 %res @@ -560,6 +569,7 @@ define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { ; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc1] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill> +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask) ret i16 %res @@ -1660,9 +1670,9 @@ define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xd9] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1] -; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xd3] ; CHECK-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1] -; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm3, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) @@ -1680,8 +1690,8 @@ define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> ; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xd9] ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1] -; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xd3] ; CHECK-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1] +; CHECK-NEXT: vpaddw %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) @@ -1701,8 +1711,8 @@ define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1] ; CHECK-NEXT: vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) @@ -1721,8 +1731,8 @@ define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16> ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1] ; CHECK-NEXT: vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) @@ -1741,8 +1751,8 @@ define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1] ; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1] -; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm3, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3) @@ -1761,8 +1771,8 @@ define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1] ; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1] -; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm3, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3) @@ -1780,9 +1790,9 @@ define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 ; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x03] ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] -; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xca] ; CHECK-NEXT: vpsrlw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x03] -; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1) @@ -1800,8 +1810,8 @@ define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, ; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm2 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x03] ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] -; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xca] ; CHECK-NEXT: vpsrlw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x03] +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) @@ -1821,8 +1831,8 @@ define <8 x i16>@test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] ; CHECK-NEXT: vpsraw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x71,0xe0,0x03] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) @@ -1841,8 +1851,8 @@ define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] ; CHECK-NEXT: vpsraw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xe0,0x03] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) @@ -1861,8 +1871,8 @@ define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] ; CHECK-NEXT: vpsllw $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x71,0xf0,0x03] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3) @@ -1881,8 +1891,8 @@ define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, ; CHECK-NEXT: kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce] ; CHECK-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] ; CHECK-NEXT: vpsllw $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xf0,0x03] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3) @@ -1936,8 +1946,8 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> ; CHECK-NEXT: ## xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; CHECK-NEXT: vpmovzxbw %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0] ; CHECK-NEXT: ## xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2) @@ -1959,8 +1969,8 @@ define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i1 ; CHECK-NEXT: ## ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; CHECK-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0] ; CHECK-NEXT: ## ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2) @@ -1980,8 +1990,8 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8] ; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0] -; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: vpaddw %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] +; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2) @@ -2000,8 +2010,8 @@ define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i1 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8] ; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0] -; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: vpaddw %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] +; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2) @@ -2020,8 +2030,8 @@ define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8] ; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0] -; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: vpaddq %xmm2, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2) @@ -2040,8 +2050,8 @@ define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] ; CHECK-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8] ; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0] -; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2) @@ -2834,23 +2844,23 @@ define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { ; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0] ; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8] ; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02] -; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] +; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0] +; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0] ; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] -; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02] +; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] +; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02] ; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] -; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] -; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] +; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] +; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] +; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01] +; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] ; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] ; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8] ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] -; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02] -; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03] +; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] +; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) @@ -2932,23 +2942,23 @@ define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) ; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01] ; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8] ; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02] -; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] +; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0] +; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0] ; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] -; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05] +; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06] +; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05] ; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] -; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] -; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] +; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06] +; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] +; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01] +; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] ; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] ; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8] ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] -; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02] -; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03] +; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] +; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) @@ -2997,6 +3007,7 @@ define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-NEXT: kxnorw %k0, %k0, %k0 ## encoding: [0xc5,0xfc,0x46,0xc0] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 @@ -3043,6 +3054,7 @@ define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) ; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] ; CHECK-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 @@ -3090,6 +3102,7 @@ define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) { ; CHECK-NEXT: kxnorw %k0, %k0, %k0 ## encoding: [0xc5,0xfc,0x46,0xc0] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1) %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 @@ -3136,6 +3149,7 @@ define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask ; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1] ; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] ; CHECK-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] +; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask) %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0 |