diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/masked_memop.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/masked_memop.ll | 8037 |
1 files changed, 8036 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll index 7fa8aba0972..c31b8381aeb 100644 --- a/llvm/test/CodeGen/X86/masked_memop.ll +++ b/llvm/test/CodeGen/X86/masked_memop.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx2 < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc -mtriple=x86_64-apple-darwin -mattr=avx512f < %s | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F @@ -196,6 +196,7 @@ define <8 x double> @test5(<8 x i32> %trigger, <8 x double>* %addr, <8 x double> ; ; AVX512F-LABEL: test5: ; AVX512F: ## BB#0: +; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512F-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k1 ; AVX512F-NEXT: vmovupd (%rdi), %zmm1 {%k1} @@ -446,6 +447,8 @@ define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> ; ; AVX512F-LABEL: test11a: ; AVX512F: ## BB#0: +; AVX512F-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> +; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512F-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 ; AVX512F-NEXT: kshiftlw $8, %k0, %k0 @@ -491,6 +494,7 @@ define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) { ; ; AVX512F-LABEL: test11b: ; AVX512F: ## BB#0: +; AVX512F-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> ; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 @@ -540,6 +544,7 @@ define <8 x float> @test11c(<8 x i1> %mask, <8 x float>* %addr) { ; AVX512F-NEXT: kshiftlw $8, %k0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 ; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; AVX512F-NEXT: retq ; ; SKX-LABEL: test11c: @@ -581,6 +586,7 @@ define <8 x i32> @test11d(<8 x i1> %mask, <8 x i32>* %addr) { ; AVX512F-NEXT: kshiftlw $8, %k0, %k0 ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 ; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} +; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; AVX512F-NEXT: retq ; ; SKX-LABEL: test11d: @@ -615,6 +621,8 @@ define void @test12(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %val) { ; ; AVX512F-LABEL: test12: ; AVX512F: ## BB#0: +; AVX512F-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def> +; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512F-NEXT: vpxor %ymm2, %ymm2, %ymm2 ; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0 ; AVX512F-NEXT: kshiftlw $8, %k0, %k0 @@ -1007,9 +1015,11 @@ define <8 x float> @mload_constmask_v8f32(<8 x float>* %addr, <8 x float> %dst) ; ; AVX512F-LABEL: mload_constmask_v8f32: ; AVX512F: ## BB#0: +; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512F-NEXT: movw $7, %ax ; AVX512F-NEXT: kmovw %eax, %k1 ; AVX512F-NEXT: vmovups (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v8f32: @@ -1062,9 +1072,11 @@ define <8 x i32> @mload_constmask_v8i32(<8 x i32>* %addr, <8 x i32> %dst) { ; ; AVX512F-LABEL: mload_constmask_v8i32: ; AVX512F: ## BB#0: +; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; AVX512F-NEXT: movw $135, %ax ; AVX512F-NEXT: kmovw %eax, %k1 ; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} +; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> ; AVX512F-NEXT: retq ; ; SKX-LABEL: mload_constmask_v8i32: @@ -2221,6 +2233,251 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 declare <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0) define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { +; AVX-LABEL: test_mask_load_16xi8: +; AVX: ## BB#0: +; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: ## implicit-def: %XMM1 +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_2 +; AVX-NEXT: ## BB#1: ## %cond.load +; AVX-NEXT: movzbl (%rdi), %eax +; AVX-NEXT: vmovd %eax, %xmm1 +; AVX-NEXT: LBB50_2: ## %else +; AVX-NEXT: vpextrb $1, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_4 +; AVX-NEXT: ## BB#3: ## %cond.load1 +; AVX-NEXT: vpinsrb $1, 1(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_4: ## %else2 +; AVX-NEXT: vpextrb $2, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_6 +; AVX-NEXT: ## BB#5: ## %cond.load4 +; AVX-NEXT: vpinsrb $2, 2(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_6: ## %else5 +; AVX-NEXT: vpextrb $3, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_8 +; AVX-NEXT: ## BB#7: ## %cond.load7 +; AVX-NEXT: vpinsrb $3, 3(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_8: ## %else8 +; AVX-NEXT: vpextrb $4, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_10 +; AVX-NEXT: ## BB#9: ## %cond.load10 +; AVX-NEXT: vpinsrb $4, 4(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_10: ## %else11 +; AVX-NEXT: vpextrb $5, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_12 +; AVX-NEXT: ## BB#11: ## %cond.load13 +; AVX-NEXT: vpinsrb $5, 5(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_12: ## %else14 +; AVX-NEXT: vpextrb $6, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_14 +; AVX-NEXT: ## BB#13: ## %cond.load16 +; AVX-NEXT: vpinsrb $6, 6(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_14: ## %else17 +; AVX-NEXT: vpextrb $7, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_16 +; AVX-NEXT: ## BB#15: ## %cond.load19 +; AVX-NEXT: vpinsrb $7, 7(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_16: ## %else20 +; AVX-NEXT: vpextrb $8, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_18 +; AVX-NEXT: ## BB#17: ## %cond.load22 +; AVX-NEXT: vpinsrb $8, 8(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_18: ## %else23 +; AVX-NEXT: vpextrb $9, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_20 +; AVX-NEXT: ## BB#19: ## %cond.load25 +; AVX-NEXT: vpinsrb $9, 9(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_20: ## %else26 +; AVX-NEXT: vpextrb $10, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_22 +; AVX-NEXT: ## BB#21: ## %cond.load28 +; AVX-NEXT: vpinsrb $10, 10(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_22: ## %else29 +; AVX-NEXT: vpextrb $11, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_24 +; AVX-NEXT: ## BB#23: ## %cond.load31 +; AVX-NEXT: vpinsrb $11, 11(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_24: ## %else32 +; AVX-NEXT: vpextrb $12, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_26 +; AVX-NEXT: ## BB#25: ## %cond.load34 +; AVX-NEXT: vpinsrb $12, 12(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_26: ## %else35 +; AVX-NEXT: vpextrb $13, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_28 +; AVX-NEXT: ## BB#27: ## %cond.load37 +; AVX-NEXT: vpinsrb $13, 13(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_28: ## %else38 +; AVX-NEXT: vpextrb $14, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_30 +; AVX-NEXT: ## BB#29: ## %cond.load40 +; AVX-NEXT: vpinsrb $14, 14(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_30: ## %else41 +; AVX-NEXT: vpextrb $15, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB50_32 +; AVX-NEXT: ## BB#31: ## %cond.load43 +; AVX-NEXT: vpinsrb $15, 15(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB50_32: ## %else44 +; AVX-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: test_mask_load_16xi8: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: kshiftlw $15, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: ## implicit-def: %XMM0 +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_2 +; AVX512F-NEXT: ## BB#1: ## %cond.load +; AVX512F-NEXT: movzbl (%rdi), %eax +; AVX512F-NEXT: vmovd %eax, %xmm0 +; AVX512F-NEXT: LBB50_2: ## %else +; AVX512F-NEXT: kshiftlw $14, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_4 +; AVX512F-NEXT: ## BB#3: ## %cond.load1 +; AVX512F-NEXT: vpinsrb $1, 1(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_4: ## %else2 +; AVX512F-NEXT: kshiftlw $13, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_6 +; AVX512F-NEXT: ## BB#5: ## %cond.load4 +; AVX512F-NEXT: vpinsrb $2, 2(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_6: ## %else5 +; AVX512F-NEXT: kshiftlw $12, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_8 +; AVX512F-NEXT: ## BB#7: ## %cond.load7 +; AVX512F-NEXT: vpinsrb $3, 3(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_8: ## %else8 +; AVX512F-NEXT: kshiftlw $11, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_10 +; AVX512F-NEXT: ## BB#9: ## %cond.load10 +; AVX512F-NEXT: vpinsrb $4, 4(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_10: ## %else11 +; AVX512F-NEXT: kshiftlw $10, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_12 +; AVX512F-NEXT: ## BB#11: ## %cond.load13 +; AVX512F-NEXT: vpinsrb $5, 5(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_12: ## %else14 +; AVX512F-NEXT: kshiftlw $9, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_14 +; AVX512F-NEXT: ## BB#13: ## %cond.load16 +; AVX512F-NEXT: vpinsrb $6, 6(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_14: ## %else17 +; AVX512F-NEXT: kshiftlw $8, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_16 +; AVX512F-NEXT: ## BB#15: ## %cond.load19 +; AVX512F-NEXT: vpinsrb $7, 7(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_16: ## %else20 +; AVX512F-NEXT: kshiftlw $7, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_18 +; AVX512F-NEXT: ## BB#17: ## %cond.load22 +; AVX512F-NEXT: vpinsrb $8, 8(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_18: ## %else23 +; AVX512F-NEXT: kshiftlw $6, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_20 +; AVX512F-NEXT: ## BB#19: ## %cond.load25 +; AVX512F-NEXT: vpinsrb $9, 9(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_20: ## %else26 +; AVX512F-NEXT: kshiftlw $5, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_22 +; AVX512F-NEXT: ## BB#21: ## %cond.load28 +; AVX512F-NEXT: vpinsrb $10, 10(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_22: ## %else29 +; AVX512F-NEXT: kshiftlw $4, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_24 +; AVX512F-NEXT: ## BB#23: ## %cond.load31 +; AVX512F-NEXT: vpinsrb $11, 11(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_24: ## %else32 +; AVX512F-NEXT: kshiftlw $3, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_26 +; AVX512F-NEXT: ## BB#25: ## %cond.load34 +; AVX512F-NEXT: vpinsrb $12, 12(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_26: ## %else35 +; AVX512F-NEXT: kshiftlw $2, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_28 +; AVX512F-NEXT: ## BB#27: ## %cond.load37 +; AVX512F-NEXT: vpinsrb $13, 13(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_28: ## %else38 +; AVX512F-NEXT: kshiftlw $1, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_30 +; AVX512F-NEXT: ## BB#29: ## %cond.load40 +; AVX512F-NEXT: vpinsrb $14, 14(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_30: ## %else41 +; AVX512F-NEXT: kshiftlw $0, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB50_32 +; AVX512F-NEXT: ## BB#31: ## %cond.load43 +; AVX512F-NEXT: vpinsrb $15, 15(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB50_32: ## %else44 +; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1 {%k1} {z} +; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512F-NEXT: vpblendvb %xmm1, %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; ; SKX-LABEL: test_mask_load_16xi8: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -2233,6 +2490,764 @@ define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>) define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { +; AVX1-LABEL: test_mask_load_32xi8: +; AVX1: ## BB#0: +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: ## implicit-def: %YMM1 +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_2 +; AVX1-NEXT: ## BB#1: ## %cond.load +; AVX1-NEXT: movzbl (%rdi), %eax +; AVX1-NEXT: vmovd %eax, %xmm1 +; AVX1-NEXT: LBB51_2: ## %else +; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_4 +; AVX1-NEXT: ## BB#3: ## %cond.load1 +; AVX1-NEXT: vpinsrb $1, 1(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_4: ## %else2 +; AVX1-NEXT: vpextrb $2, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_6 +; AVX1-NEXT: ## BB#5: ## %cond.load4 +; AVX1-NEXT: vpinsrb $2, 2(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_6: ## %else5 +; AVX1-NEXT: vpextrb $3, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_8 +; AVX1-NEXT: ## BB#7: ## %cond.load7 +; AVX1-NEXT: vpinsrb $3, 3(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_8: ## %else8 +; AVX1-NEXT: vpextrb $4, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_10 +; AVX1-NEXT: ## BB#9: ## %cond.load10 +; AVX1-NEXT: vpinsrb $4, 4(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_10: ## %else11 +; AVX1-NEXT: vpextrb $5, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_12 +; AVX1-NEXT: ## BB#11: ## %cond.load13 +; AVX1-NEXT: vpinsrb $5, 5(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_12: ## %else14 +; AVX1-NEXT: vpextrb $6, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_14 +; AVX1-NEXT: ## BB#13: ## %cond.load16 +; AVX1-NEXT: vpinsrb $6, 6(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_14: ## %else17 +; AVX1-NEXT: vpextrb $7, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_16 +; AVX1-NEXT: ## BB#15: ## %cond.load19 +; AVX1-NEXT: vpinsrb $7, 7(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_16: ## %else20 +; AVX1-NEXT: vpextrb $8, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_18 +; AVX1-NEXT: ## BB#17: ## %cond.load22 +; AVX1-NEXT: vpinsrb $8, 8(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_18: ## %else23 +; AVX1-NEXT: vpextrb $9, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_20 +; AVX1-NEXT: ## BB#19: ## %cond.load25 +; AVX1-NEXT: vpinsrb $9, 9(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_20: ## %else26 +; AVX1-NEXT: vpextrb $10, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_22 +; AVX1-NEXT: ## BB#21: ## %cond.load28 +; AVX1-NEXT: vpinsrb $10, 10(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_22: ## %else29 +; AVX1-NEXT: vpextrb $11, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_24 +; AVX1-NEXT: ## BB#23: ## %cond.load31 +; AVX1-NEXT: vpinsrb $11, 11(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_24: ## %else32 +; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_26 +; AVX1-NEXT: ## BB#25: ## %cond.load34 +; AVX1-NEXT: vpinsrb $12, 12(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_26: ## %else35 +; AVX1-NEXT: vpextrb $13, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_28 +; AVX1-NEXT: ## BB#27: ## %cond.load37 +; AVX1-NEXT: vpinsrb $13, 13(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_28: ## %else38 +; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_30 +; AVX1-NEXT: ## BB#29: ## %cond.load40 +; AVX1-NEXT: vpinsrb $14, 14(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_30: ## %else41 +; AVX1-NEXT: vpextrb $15, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_32 +; AVX1-NEXT: ## BB#31: ## %cond.load43 +; AVX1-NEXT: vpinsrb $15, 15(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB51_32: ## %else44 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $0, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_34 +; AVX1-NEXT: ## BB#33: ## %cond.load46 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $0, 16(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_34: ## %else47 +; AVX1-NEXT: vpextrb $1, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_36 +; AVX1-NEXT: ## BB#35: ## %cond.load49 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $1, 17(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_36: ## %else50 +; AVX1-NEXT: vpextrb $2, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_38 +; AVX1-NEXT: ## BB#37: ## %cond.load52 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $2, 18(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_38: ## %else53 +; AVX1-NEXT: vpextrb $3, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_40 +; AVX1-NEXT: ## BB#39: ## %cond.load55 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $3, 19(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_40: ## %else56 +; AVX1-NEXT: vpextrb $4, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_42 +; AVX1-NEXT: ## BB#41: ## %cond.load58 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $4, 20(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_42: ## %else59 +; AVX1-NEXT: vpextrb $5, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_44 +; AVX1-NEXT: ## BB#43: ## %cond.load61 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $5, 21(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_44: ## %else62 +; AVX1-NEXT: vpextrb $6, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_46 +; AVX1-NEXT: ## BB#45: ## %cond.load64 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $6, 22(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_46: ## %else65 +; AVX1-NEXT: vpextrb $7, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_48 +; AVX1-NEXT: ## BB#47: ## %cond.load67 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $7, 23(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_48: ## %else68 +; AVX1-NEXT: vpextrb $8, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_50 +; AVX1-NEXT: ## BB#49: ## %cond.load70 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $8, 24(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_50: ## %else71 +; AVX1-NEXT: vpextrb $9, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_52 +; AVX1-NEXT: ## BB#51: ## %cond.load73 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $9, 25(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_52: ## %else74 +; AVX1-NEXT: vpextrb $10, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_54 +; AVX1-NEXT: ## BB#53: ## %cond.load76 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $10, 26(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_54: ## %else77 +; AVX1-NEXT: vpextrb $11, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_56 +; AVX1-NEXT: ## BB#55: ## %cond.load79 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $11, 27(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_56: ## %else80 +; AVX1-NEXT: vpextrb $12, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_58 +; AVX1-NEXT: ## BB#57: ## %cond.load82 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $12, 28(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_58: ## %else83 +; AVX1-NEXT: vpextrb $13, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_60 +; AVX1-NEXT: ## BB#59: ## %cond.load85 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $13, 29(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_60: ## %else86 +; AVX1-NEXT: vpextrb $14, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_62 +; AVX1-NEXT: ## BB#61: ## %cond.load88 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $14, 30(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_62: ## %else89 +; AVX1-NEXT: vpextrb $15, %xmm2, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB51_64 +; AVX1-NEXT: ## BB#63: ## %cond.load91 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpinsrb $15, 31(%rdi), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: LBB51_64: ## %else92 +; AVX1-NEXT: vpsllw $7, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtb %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpgtb %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_mask_load_32xi8: +; AVX2: ## BB#0: +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: ## implicit-def: %YMM1 +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_2 +; AVX2-NEXT: ## BB#1: ## %cond.load +; AVX2-NEXT: movzbl (%rdi), %eax +; AVX2-NEXT: vmovd %eax, %xmm1 +; AVX2-NEXT: LBB51_2: ## %else +; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_4 +; AVX2-NEXT: ## BB#3: ## %cond.load1 +; AVX2-NEXT: vpinsrb $1, 1(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_4: ## %else2 +; AVX2-NEXT: vpextrb $2, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_6 +; AVX2-NEXT: ## BB#5: ## %cond.load4 +; AVX2-NEXT: vpinsrb $2, 2(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_6: ## %else5 +; AVX2-NEXT: vpextrb $3, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_8 +; AVX2-NEXT: ## BB#7: ## %cond.load7 +; AVX2-NEXT: vpinsrb $3, 3(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_8: ## %else8 +; AVX2-NEXT: vpextrb $4, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_10 +; AVX2-NEXT: ## BB#9: ## %cond.load10 +; AVX2-NEXT: vpinsrb $4, 4(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_10: ## %else11 +; AVX2-NEXT: vpextrb $5, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_12 +; AVX2-NEXT: ## BB#11: ## %cond.load13 +; AVX2-NEXT: vpinsrb $5, 5(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_12: ## %else14 +; AVX2-NEXT: vpextrb $6, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_14 +; AVX2-NEXT: ## BB#13: ## %cond.load16 +; AVX2-NEXT: vpinsrb $6, 6(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_14: ## %else17 +; AVX2-NEXT: vpextrb $7, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_16 +; AVX2-NEXT: ## BB#15: ## %cond.load19 +; AVX2-NEXT: vpinsrb $7, 7(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_16: ## %else20 +; AVX2-NEXT: vpextrb $8, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_18 +; AVX2-NEXT: ## BB#17: ## %cond.load22 +; AVX2-NEXT: vpinsrb $8, 8(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_18: ## %else23 +; AVX2-NEXT: vpextrb $9, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_20 +; AVX2-NEXT: ## BB#19: ## %cond.load25 +; AVX2-NEXT: vpinsrb $9, 9(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_20: ## %else26 +; AVX2-NEXT: vpextrb $10, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_22 +; AVX2-NEXT: ## BB#21: ## %cond.load28 +; AVX2-NEXT: vpinsrb $10, 10(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_22: ## %else29 +; AVX2-NEXT: vpextrb $11, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_24 +; AVX2-NEXT: ## BB#23: ## %cond.load31 +; AVX2-NEXT: vpinsrb $11, 11(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_24: ## %else32 +; AVX2-NEXT: vpextrb $12, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_26 +; AVX2-NEXT: ## BB#25: ## %cond.load34 +; AVX2-NEXT: vpinsrb $12, 12(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_26: ## %else35 +; AVX2-NEXT: vpextrb $13, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_28 +; AVX2-NEXT: ## BB#27: ## %cond.load37 +; AVX2-NEXT: vpinsrb $13, 13(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_28: ## %else38 +; AVX2-NEXT: vpextrb $14, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_30 +; AVX2-NEXT: ## BB#29: ## %cond.load40 +; AVX2-NEXT: vpinsrb $14, 14(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_30: ## %else41 +; AVX2-NEXT: vpextrb $15, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_32 +; AVX2-NEXT: ## BB#31: ## %cond.load43 +; AVX2-NEXT: vpinsrb $15, 15(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB51_32: ## %else44 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $0, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_34 +; AVX2-NEXT: ## BB#33: ## %cond.load46 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $0, 16(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_34: ## %else47 +; AVX2-NEXT: vpextrb $1, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_36 +; AVX2-NEXT: ## BB#35: ## %cond.load49 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $1, 17(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_36: ## %else50 +; AVX2-NEXT: vpextrb $2, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_38 +; AVX2-NEXT: ## BB#37: ## %cond.load52 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $2, 18(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_38: ## %else53 +; AVX2-NEXT: vpextrb $3, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_40 +; AVX2-NEXT: ## BB#39: ## %cond.load55 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $3, 19(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_40: ## %else56 +; AVX2-NEXT: vpextrb $4, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_42 +; AVX2-NEXT: ## BB#41: ## %cond.load58 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $4, 20(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_42: ## %else59 +; AVX2-NEXT: vpextrb $5, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_44 +; AVX2-NEXT: ## BB#43: ## %cond.load61 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $5, 21(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_44: ## %else62 +; AVX2-NEXT: vpextrb $6, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_46 +; AVX2-NEXT: ## BB#45: ## %cond.load64 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $6, 22(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_46: ## %else65 +; AVX2-NEXT: vpextrb $7, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_48 +; AVX2-NEXT: ## BB#47: ## %cond.load67 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $7, 23(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_48: ## %else68 +; AVX2-NEXT: vpextrb $8, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_50 +; AVX2-NEXT: ## BB#49: ## %cond.load70 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $8, 24(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_50: ## %else71 +; AVX2-NEXT: vpextrb $9, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_52 +; AVX2-NEXT: ## BB#51: ## %cond.load73 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $9, 25(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_52: ## %else74 +; AVX2-NEXT: vpextrb $10, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_54 +; AVX2-NEXT: ## BB#53: ## %cond.load76 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $10, 26(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_54: ## %else77 +; AVX2-NEXT: vpextrb $11, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_56 +; AVX2-NEXT: ## BB#55: ## %cond.load79 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $11, 27(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_56: ## %else80 +; AVX2-NEXT: vpextrb $12, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_58 +; AVX2-NEXT: ## BB#57: ## %cond.load82 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $12, 28(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_58: ## %else83 +; AVX2-NEXT: vpextrb $13, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_60 +; AVX2-NEXT: ## BB#59: ## %cond.load85 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $13, 29(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_60: ## %else86 +; AVX2-NEXT: vpextrb $14, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_62 +; AVX2-NEXT: ## BB#61: ## %cond.load88 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpinsrb $14, 30(%rdi), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_62: ## %else89 +; AVX2-NEXT: vpextrb $15, %xmm2, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB51_64 +; AVX2-NEXT: ## BB#63: ## %cond.load91 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpinsrb $15, 31(%rdi), %xmm2, %xmm2 +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX2-NEXT: LBB51_64: ## %else92 +; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2 +; AVX2-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: test_mask_load_32xi8: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpextrb $0, %xmm0, %eax +; AVX512F-NEXT: ## implicit-def: %YMM1 +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_2 +; AVX512F-NEXT: ## BB#1: ## %cond.load +; AVX512F-NEXT: movzbl (%rdi), %eax +; AVX512F-NEXT: vmovd %eax, %xmm1 +; AVX512F-NEXT: LBB51_2: ## %else +; AVX512F-NEXT: vpextrb $1, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_4 +; AVX512F-NEXT: ## BB#3: ## %cond.load1 +; AVX512F-NEXT: vpinsrb $1, 1(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_4: ## %else2 +; AVX512F-NEXT: vpextrb $2, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_6 +; AVX512F-NEXT: ## BB#5: ## %cond.load4 +; AVX512F-NEXT: vpinsrb $2, 2(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_6: ## %else5 +; AVX512F-NEXT: vpextrb $3, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_8 +; AVX512F-NEXT: ## BB#7: ## %cond.load7 +; AVX512F-NEXT: vpinsrb $3, 3(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_8: ## %else8 +; AVX512F-NEXT: vpextrb $4, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_10 +; AVX512F-NEXT: ## BB#9: ## %cond.load10 +; AVX512F-NEXT: vpinsrb $4, 4(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_10: ## %else11 +; AVX512F-NEXT: vpextrb $5, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_12 +; AVX512F-NEXT: ## BB#11: ## %cond.load13 +; AVX512F-NEXT: vpinsrb $5, 5(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_12: ## %else14 +; AVX512F-NEXT: vpextrb $6, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_14 +; AVX512F-NEXT: ## BB#13: ## %cond.load16 +; AVX512F-NEXT: vpinsrb $6, 6(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_14: ## %else17 +; AVX512F-NEXT: vpextrb $7, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_16 +; AVX512F-NEXT: ## BB#15: ## %cond.load19 +; AVX512F-NEXT: vpinsrb $7, 7(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_16: ## %else20 +; AVX512F-NEXT: vpextrb $8, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_18 +; AVX512F-NEXT: ## BB#17: ## %cond.load22 +; AVX512F-NEXT: vpinsrb $8, 8(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_18: ## %else23 +; AVX512F-NEXT: vpextrb $9, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_20 +; AVX512F-NEXT: ## BB#19: ## %cond.load25 +; AVX512F-NEXT: vpinsrb $9, 9(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_20: ## %else26 +; AVX512F-NEXT: vpextrb $10, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_22 +; AVX512F-NEXT: ## BB#21: ## %cond.load28 +; AVX512F-NEXT: vpinsrb $10, 10(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_22: ## %else29 +; AVX512F-NEXT: vpextrb $11, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_24 +; AVX512F-NEXT: ## BB#23: ## %cond.load31 +; AVX512F-NEXT: vpinsrb $11, 11(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_24: ## %else32 +; AVX512F-NEXT: vpextrb $12, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_26 +; AVX512F-NEXT: ## BB#25: ## %cond.load34 +; AVX512F-NEXT: vpinsrb $12, 12(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_26: ## %else35 +; AVX512F-NEXT: vpextrb $13, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_28 +; AVX512F-NEXT: ## BB#27: ## %cond.load37 +; AVX512F-NEXT: vpinsrb $13, 13(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_28: ## %else38 +; AVX512F-NEXT: vpextrb $14, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_30 +; AVX512F-NEXT: ## BB#29: ## %cond.load40 +; AVX512F-NEXT: vpinsrb $14, 14(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_30: ## %else41 +; AVX512F-NEXT: vpextrb $15, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_32 +; AVX512F-NEXT: ## BB#31: ## %cond.load43 +; AVX512F-NEXT: vpinsrb $15, 15(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB51_32: ## %else44 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX512F-NEXT: vpextrb $0, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_34 +; AVX512F-NEXT: ## BB#33: ## %cond.load46 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $0, 16(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_34: ## %else47 +; AVX512F-NEXT: vpextrb $1, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_36 +; AVX512F-NEXT: ## BB#35: ## %cond.load49 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $1, 17(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_36: ## %else50 +; AVX512F-NEXT: vpextrb $2, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_38 +; AVX512F-NEXT: ## BB#37: ## %cond.load52 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $2, 18(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_38: ## %else53 +; AVX512F-NEXT: vpextrb $3, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_40 +; AVX512F-NEXT: ## BB#39: ## %cond.load55 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $3, 19(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_40: ## %else56 +; AVX512F-NEXT: vpextrb $4, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_42 +; AVX512F-NEXT: ## BB#41: ## %cond.load58 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $4, 20(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_42: ## %else59 +; AVX512F-NEXT: vpextrb $5, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_44 +; AVX512F-NEXT: ## BB#43: ## %cond.load61 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $5, 21(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_44: ## %else62 +; AVX512F-NEXT: vpextrb $6, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_46 +; AVX512F-NEXT: ## BB#45: ## %cond.load64 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $6, 22(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_46: ## %else65 +; AVX512F-NEXT: vpextrb $7, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_48 +; AVX512F-NEXT: ## BB#47: ## %cond.load67 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $7, 23(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_48: ## %else68 +; AVX512F-NEXT: vpextrb $8, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_50 +; AVX512F-NEXT: ## BB#49: ## %cond.load70 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $8, 24(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_50: ## %else71 +; AVX512F-NEXT: vpextrb $9, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_52 +; AVX512F-NEXT: ## BB#51: ## %cond.load73 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $9, 25(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_52: ## %else74 +; AVX512F-NEXT: vpextrb $10, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_54 +; AVX512F-NEXT: ## BB#53: ## %cond.load76 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $10, 26(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_54: ## %else77 +; AVX512F-NEXT: vpextrb $11, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_56 +; AVX512F-NEXT: ## BB#55: ## %cond.load79 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $11, 27(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_56: ## %else80 +; AVX512F-NEXT: vpextrb $12, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_58 +; AVX512F-NEXT: ## BB#57: ## %cond.load82 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $12, 28(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_58: ## %else83 +; AVX512F-NEXT: vpextrb $13, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_60 +; AVX512F-NEXT: ## BB#59: ## %cond.load85 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $13, 29(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_60: ## %else86 +; AVX512F-NEXT: vpextrb $14, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_62 +; AVX512F-NEXT: ## BB#61: ## %cond.load88 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpinsrb $14, 30(%rdi), %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_62: ## %else89 +; AVX512F-NEXT: vpextrb $15, %xmm2, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB51_64 +; AVX512F-NEXT: ## BB#63: ## %cond.load91 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $15, 31(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB51_64: ## %else92 +; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0 +; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpxor %ymm2, %ymm2, %ymm2 +; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; ; SKX-LABEL: test_mask_load_32xi8: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 @@ -2245,6 +3260,2278 @@ define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x declare <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>*, i32, <32 x i1>, <32 x i8>) define <64 x i8> @test_mask_load_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) { +; AVX1-LABEL: test_mask_load_64xi8: +; AVX1: ## BB#0: +; AVX1-NEXT: pushq %rbp +; AVX1-NEXT: Ltmp3: +; AVX1-NEXT: .cfi_def_cfa_offset 16 +; AVX1-NEXT: pushq %r15 +; AVX1-NEXT: Ltmp4: +; AVX1-NEXT: .cfi_def_cfa_offset 24 +; AVX1-NEXT: pushq %r14 +; AVX1-NEXT: Ltmp5: +; AVX1-NEXT: .cfi_def_cfa_offset 32 +; AVX1-NEXT: pushq %r13 +; AVX1-NEXT: Ltmp6: +; AVX1-NEXT: .cfi_def_cfa_offset 40 +; AVX1-NEXT: pushq %r12 +; AVX1-NEXT: Ltmp7: +; AVX1-NEXT: .cfi_def_cfa_offset 48 +; AVX1-NEXT: pushq %rbx +; AVX1-NEXT: Ltmp8: +; AVX1-NEXT: .cfi_def_cfa_offset 56 +; AVX1-NEXT: pushq %rax +; AVX1-NEXT: Ltmp9: +; AVX1-NEXT: .cfi_def_cfa_offset 64 +; AVX1-NEXT: Ltmp10: +; AVX1-NEXT: .cfi_offset %rbx, -56 +; AVX1-NEXT: Ltmp11: +; AVX1-NEXT: .cfi_offset %r12, -48 +; AVX1-NEXT: Ltmp12: +; AVX1-NEXT: .cfi_offset %r13, -40 +; AVX1-NEXT: Ltmp13: +; AVX1-NEXT: .cfi_offset %r14, -32 +; AVX1-NEXT: Ltmp14: +; AVX1-NEXT: .cfi_offset %r15, -24 +; AVX1-NEXT: Ltmp15: +; AVX1-NEXT: .cfi_offset %rbp, -16 +; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX1-NEXT: movl %edi, %r13d +; AVX1-NEXT: testb $1, %dil +; AVX1-NEXT: je LBB52_2 +; AVX1-NEXT: ## BB#1: ## %cond.load +; AVX1-NEXT: movzbl (%rax), %ebp +; AVX1-NEXT: vmovd %ebp, %xmm9 +; AVX1-NEXT: LBB52_2: ## %else +; AVX1-NEXT: testb $1, %sil +; AVX1-NEXT: je LBB52_4 +; AVX1-NEXT: ## BB#3: ## %cond.load1 +; AVX1-NEXT: vpinsrb $1, 1(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_4: ## %else2 +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB52_6 +; AVX1-NEXT: ## BB#5: ## %cond.load4 +; AVX1-NEXT: vpinsrb $2, 2(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_6: ## %else5 +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB52_8 +; AVX1-NEXT: ## BB#7: ## %cond.load7 +; AVX1-NEXT: vpinsrb $3, 3(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_8: ## %else8 +; AVX1-NEXT: testb $1, %r8b +; AVX1-NEXT: je LBB52_10 +; AVX1-NEXT: ## BB#9: ## %cond.load10 +; AVX1-NEXT: vpinsrb $4, 4(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_10: ## %else11 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; AVX1-NEXT: testb $1, %r9b +; AVX1-NEXT: je LBB52_12 +; AVX1-NEXT: ## BB#11: ## %cond.load13 +; AVX1-NEXT: vpinsrb $5, 5(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_12: ## %else14 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %r11b +; AVX1-NEXT: testb $1, %r10b +; AVX1-NEXT: je LBB52_14 +; AVX1-NEXT: ## BB#13: ## %cond.load16 +; AVX1-NEXT: vpinsrb $6, 6(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_14: ## %else17 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; AVX1-NEXT: testb $1, %r11b +; AVX1-NEXT: je LBB52_16 +; AVX1-NEXT: ## BB#15: ## %cond.load19 +; AVX1-NEXT: vpinsrb $7, 7(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_16: ## %else20 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %r15b +; AVX1-NEXT: testb $1, %r14b +; AVX1-NEXT: je LBB52_18 +; AVX1-NEXT: ## BB#17: ## %cond.load22 +; AVX1-NEXT: vpinsrb $8, 8(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_18: ## %else23 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; AVX1-NEXT: testb $1, %r15b +; AVX1-NEXT: je LBB52_20 +; AVX1-NEXT: ## BB#19: ## %cond.load25 +; AVX1-NEXT: vpinsrb $9, 9(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_20: ## %else26 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dil +; AVX1-NEXT: testb $1, %r12b +; AVX1-NEXT: je LBB52_22 +; AVX1-NEXT: ## BB#21: ## %cond.load28 +; AVX1-NEXT: vpinsrb $10, 10(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_22: ## %else29 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; AVX1-NEXT: testb $1, %dil +; AVX1-NEXT: je LBB52_24 +; AVX1-NEXT: ## BB#23: ## %cond.load31 +; AVX1-NEXT: vpinsrb $11, 11(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_24: ## %else32 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %bl +; AVX1-NEXT: testb $1, %bpl +; AVX1-NEXT: je LBB52_26 +; AVX1-NEXT: ## BB#25: ## %cond.load34 +; AVX1-NEXT: vpinsrb $12, 12(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_26: ## %else35 +; AVX1-NEXT: testb $1, %bl +; AVX1-NEXT: je LBB52_28 +; AVX1-NEXT: ## BB#27: ## %cond.load37 +; AVX1-NEXT: vpinsrb $13, 13(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_28: ## %else38 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_30 +; AVX1-NEXT: ## BB#29: ## %cond.load40 +; AVX1-NEXT: vpinsrb $14, 14(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_30: ## %else41 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_32 +; AVX1-NEXT: ## BB#31: ## %cond.load43 +; AVX1-NEXT: vpinsrb $15, 15(%rax), %xmm9, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm9 = ymm3[0,1,2,3],ymm9[4,5,6,7] +; AVX1-NEXT: LBB52_32: ## %else44 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_34 +; AVX1-NEXT: ## BB#33: ## %cond.load46 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $0, 16(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_34: ## %else47 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_36 +; AVX1-NEXT: ## BB#35: ## %cond.load49 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $1, 17(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_36: ## %else50 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_38 +; AVX1-NEXT: ## BB#37: ## %cond.load52 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $2, 18(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_38: ## %else53 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_40 +; AVX1-NEXT: ## BB#39: ## %cond.load55 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $3, 19(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_40: ## %else56 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_42 +; AVX1-NEXT: ## BB#41: ## %cond.load58 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $4, 20(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_42: ## %else59 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_44 +; AVX1-NEXT: ## BB#43: ## %cond.load61 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $5, 21(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_44: ## %else62 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_46 +; AVX1-NEXT: ## BB#45: ## %cond.load64 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $6, 22(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_46: ## %else65 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_48 +; AVX1-NEXT: ## BB#47: ## %cond.load67 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $7, 23(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_48: ## %else68 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_50 +; AVX1-NEXT: ## BB#49: ## %cond.load70 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $8, 24(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_50: ## %else71 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_52 +; AVX1-NEXT: ## BB#51: ## %cond.load73 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $9, 25(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_52: ## %else74 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_54 +; AVX1-NEXT: ## BB#53: ## %cond.load76 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $10, 26(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_54: ## %else77 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_56 +; AVX1-NEXT: ## BB#55: ## %cond.load79 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $11, 27(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_56: ## %else80 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_58 +; AVX1-NEXT: ## BB#57: ## %cond.load82 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $12, 28(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_58: ## %else83 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_60 +; AVX1-NEXT: ## BB#59: ## %cond.load85 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $13, 29(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_60: ## %else86 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_62 +; AVX1-NEXT: ## BB#61: ## %cond.load88 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $14, 30(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_62: ## %else89 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_64 +; AVX1-NEXT: ## BB#63: ## %cond.load91 +; AVX1-NEXT: vextractf128 $1, %ymm9, %xmm3 +; AVX1-NEXT: vpinsrb $15, 31(%rax), %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm9, %ymm9 +; AVX1-NEXT: LBB52_64: ## %else92 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_66 +; AVX1-NEXT: ## BB#65: ## %cond.load94 +; AVX1-NEXT: vpinsrb $0, 32(%rax), %xmm0, %xmm3 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm0[4,5,6,7] +; AVX1-NEXT: LBB52_66: ## %else95 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_68 +; AVX1-NEXT: ## BB#67: ## %cond.load97 +; AVX1-NEXT: vpinsrb $1, 33(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_68: ## %else98 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_70 +; AVX1-NEXT: ## BB#69: ## %cond.load100 +; AVX1-NEXT: vpinsrb $2, 34(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_70: ## %else101 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_72 +; AVX1-NEXT: ## BB#71: ## %cond.load103 +; AVX1-NEXT: vpinsrb $3, 35(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_72: ## %else104 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_74 +; AVX1-NEXT: ## BB#73: ## %cond.load106 +; AVX1-NEXT: vpinsrb $4, 36(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_74: ## %else107 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_76 +; AVX1-NEXT: ## BB#75: ## %cond.load109 +; AVX1-NEXT: vpinsrb $5, 37(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_76: ## %else110 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_78 +; AVX1-NEXT: ## BB#77: ## %cond.load112 +; AVX1-NEXT: vpinsrb $6, 38(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_78: ## %else113 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_80 +; AVX1-NEXT: ## BB#79: ## %cond.load115 +; AVX1-NEXT: vpinsrb $7, 39(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_80: ## %else116 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_82 +; AVX1-NEXT: ## BB#81: ## %cond.load118 +; AVX1-NEXT: vpinsrb $8, 40(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_82: ## %else119 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_84 +; AVX1-NEXT: ## BB#83: ## %cond.load121 +; AVX1-NEXT: vpinsrb $9, 41(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_84: ## %else122 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_86 +; AVX1-NEXT: ## BB#85: ## %cond.load124 +; AVX1-NEXT: vpinsrb $10, 42(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_86: ## %else125 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_88 +; AVX1-NEXT: ## BB#87: ## %cond.load127 +; AVX1-NEXT: vpinsrb $11, 43(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_88: ## %else128 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_90 +; AVX1-NEXT: ## BB#89: ## %cond.load130 +; AVX1-NEXT: vpinsrb $12, 44(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_90: ## %else131 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_92 +; AVX1-NEXT: ## BB#91: ## %cond.load133 +; AVX1-NEXT: vpinsrb $13, 45(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_92: ## %else134 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_94 +; AVX1-NEXT: ## BB#93: ## %cond.load136 +; AVX1-NEXT: vpinsrb $14, 46(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_94: ## %else137 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_96 +; AVX1-NEXT: ## BB#95: ## %cond.load139 +; AVX1-NEXT: vpinsrb $15, 47(%rax), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB52_96: ## %else140 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_98 +; AVX1-NEXT: ## BB#97: ## %cond.load142 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $0, 48(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_98: ## %else143 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_100 +; AVX1-NEXT: ## BB#99: ## %cond.load145 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $1, 49(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_100: ## %else146 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_102 +; AVX1-NEXT: ## BB#101: ## %cond.load148 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $2, 50(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_102: ## %else149 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_104 +; AVX1-NEXT: ## BB#103: ## %cond.load151 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $3, 51(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_104: ## %else152 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_106 +; AVX1-NEXT: ## BB#105: ## %cond.load154 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $4, 52(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_106: ## %else155 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_108 +; AVX1-NEXT: ## BB#107: ## %cond.load157 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $5, 53(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_108: ## %else158 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_110 +; AVX1-NEXT: ## BB#109: ## %cond.load160 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $6, 54(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_110: ## %else161 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_112 +; AVX1-NEXT: ## BB#111: ## %cond.load163 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $7, 55(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_112: ## %else164 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_114 +; AVX1-NEXT: ## BB#113: ## %cond.load166 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $8, 56(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_114: ## %else167 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_116 +; AVX1-NEXT: ## BB#115: ## %cond.load169 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $9, 57(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_116: ## %else170 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_118 +; AVX1-NEXT: ## BB#117: ## %cond.load172 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $10, 58(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_118: ## %else173 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_120 +; AVX1-NEXT: ## BB#119: ## %cond.load175 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $11, 59(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_120: ## %else176 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_122 +; AVX1-NEXT: ## BB#121: ## %cond.load178 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $12, 60(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_122: ## %else179 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_124 +; AVX1-NEXT: ## BB#123: ## %cond.load181 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $13, 61(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_124: ## %else182 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: je LBB52_126 +; AVX1-NEXT: ## BB#125: ## %cond.load184 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $14, 62(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_126: ## %else185 +; AVX1-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX1-NEXT: movl %r9d, {{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movl %r8d, (%rsp) ## 4-byte Spill +; AVX1-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: je LBB52_128 +; AVX1-NEXT: ## BB#127: ## %cond.load187 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrb $15, 63(%rax), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB52_128: ## %else188 +; AVX1-NEXT: movzbl %r10b, %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl %r11b, %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl %r14b, %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl %r15b, %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl %r12b, %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl %dil, %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl %bpl, %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl %bl, %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %edx +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %esi +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; AVX1-NEXT: movzbl %r13b, %r13d +; AVX1-NEXT: vmovd %r13d, %xmm4 +; AVX1-NEXT: movl -{{[0-9]+}}(%rsp), %edi ## 4-byte Reload +; AVX1-NEXT: movzbl %dil, %ebp +; AVX1-NEXT: vpinsrb $1, %ebp, %xmm4, %xmm4 +; AVX1-NEXT: movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload +; AVX1-NEXT: movzbl %bpl, %ebp +; AVX1-NEXT: vpinsrb $2, %ebp, %xmm4, %xmm4 +; AVX1-NEXT: movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload +; AVX1-NEXT: movzbl %bpl, %ebp +; AVX1-NEXT: vpinsrb $3, %ebp, %xmm4, %xmm4 +; AVX1-NEXT: movl (%rsp), %ebp ## 4-byte Reload +; AVX1-NEXT: movzbl %bpl, %ebp +; AVX1-NEXT: vpinsrb $4, %ebp, %xmm4, %xmm4 +; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %ebp ## 4-byte Reload +; AVX1-NEXT: movzbl %bpl, %ebp +; AVX1-NEXT: vpinsrb $5, %ebp, %xmm4, %xmm4 +; AVX1-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX1-NEXT: vmovd -{{[0-9]+}}(%rsp), %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: ## xmm5 = mem[0],zero,zero,zero +; AVX1-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm5, %xmm8 ## 4-byte Folded Reload +; AVX1-NEXT: vmovd -{{[0-9]+}}(%rsp), %xmm6 ## 4-byte Folded Reload +; AVX1-NEXT: ## xmm6 = mem[0],zero,zero,zero +; AVX1-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; AVX1-NEXT: vpinsrb $2, %r12d, %xmm6, %xmm6 +; AVX1-NEXT: vpinsrb $3, %r15d, %xmm6, %xmm6 +; AVX1-NEXT: vpinsrb $4, %r14d, %xmm6, %xmm6 +; AVX1-NEXT: vpinsrb $5, %r11d, %xmm6, %xmm6 +; AVX1-NEXT: vpinsrb $6, %r8d, %xmm6, %xmm6 +; AVX1-NEXT: vpinsrb $7, %edx, %xmm6, %xmm6 +; AVX1-NEXT: vpinsrb $8, %eax, %xmm6, %xmm6 +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d +; AVX1-NEXT: vpinsrb $9, %ecx, %xmm6, %xmm6 +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; AVX1-NEXT: vpinsrb $10, %esi, %xmm6, %xmm6 +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d +; AVX1-NEXT: vpinsrb $11, %r9d, %xmm6, %xmm6 +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX1-NEXT: vpinsrb $12, %r10d, %xmm6, %xmm6 +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %esi +; AVX1-NEXT: vpinsrb $13, %ebx, %xmm6, %xmm6 +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX1-NEXT: vpinsrb $14, %r13d, %xmm6, %xmm6 +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %edx +; AVX1-NEXT: vpinsrb $15, %r14d, %xmm6, %xmm10 +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; AVX1-NEXT: vmovd %edi, %xmm7 +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp +; AVX1-NEXT: vpinsrb $1, %r11d, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $2, %r15d, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $3, %r12d, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $4, %r8d, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $5, %ecx, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $6, %r9d, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $7, %esi, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $8, %r10d, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $9, %eax, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $10, %r13d, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $11, %edx, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $12, %r14d, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $13, %ebx, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $14, %edi, %xmm7, %xmm7 +; AVX1-NEXT: vpinsrb $15, %ebp, %xmm7, %xmm7 +; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpcmpgtb %xmm4, %xmm2, %xmm4 +; AVX1-NEXT: vpsllw $7, %xmm8, %xmm6 +; AVX1-NEXT: vpand %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpcmpgtb %xmm6, %xmm2, %xmm6 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 +; AVX1-NEXT: vandnps %ymm0, %ymm4, %ymm0 +; AVX1-NEXT: vandps %ymm4, %ymm9, %ymm4 +; AVX1-NEXT: vorps %ymm0, %ymm4, %ymm0 +; AVX1-NEXT: vpsllw $7, %xmm10, %xmm4 +; AVX1-NEXT: vpand %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpcmpgtb %xmm4, %xmm2, %xmm4 +; AVX1-NEXT: vpsllw $7, %xmm7, %xmm6 +; AVX1-NEXT: vpand %xmm5, %xmm6, %xmm5 +; AVX1-NEXT: vpcmpgtb %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 +; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 +; AVX1-NEXT: vandps %ymm2, %ymm3, %ymm2 +; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1 +; AVX1-NEXT: addq $8, %rsp +; AVX1-NEXT: popq %rbx +; AVX1-NEXT: popq %r12 +; AVX1-NEXT: popq %r13 +; AVX1-NEXT: popq %r14 +; AVX1-NEXT: popq %r15 +; AVX1-NEXT: popq %rbp +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_mask_load_64xi8: +; AVX2: ## BB#0: +; AVX2-NEXT: pushq %rbp +; AVX2-NEXT: Ltmp3: +; AVX2-NEXT: .cfi_def_cfa_offset 16 +; AVX2-NEXT: pushq %r15 +; AVX2-NEXT: Ltmp4: +; AVX2-NEXT: .cfi_def_cfa_offset 24 +; AVX2-NEXT: pushq %r14 +; AVX2-NEXT: Ltmp5: +; AVX2-NEXT: .cfi_def_cfa_offset 32 +; AVX2-NEXT: pushq %r13 +; AVX2-NEXT: Ltmp6: +; AVX2-NEXT: .cfi_def_cfa_offset 40 +; AVX2-NEXT: pushq %r12 +; AVX2-NEXT: Ltmp7: +; AVX2-NEXT: .cfi_def_cfa_offset 48 +; AVX2-NEXT: pushq %rbx +; AVX2-NEXT: Ltmp8: +; AVX2-NEXT: .cfi_def_cfa_offset 56 +; AVX2-NEXT: pushq %rax +; AVX2-NEXT: Ltmp9: +; AVX2-NEXT: .cfi_def_cfa_offset 64 +; AVX2-NEXT: Ltmp10: +; AVX2-NEXT: .cfi_offset %rbx, -56 +; AVX2-NEXT: Ltmp11: +; AVX2-NEXT: .cfi_offset %r12, -48 +; AVX2-NEXT: Ltmp12: +; AVX2-NEXT: .cfi_offset %r13, -40 +; AVX2-NEXT: Ltmp13: +; AVX2-NEXT: .cfi_offset %r14, -32 +; AVX2-NEXT: Ltmp14: +; AVX2-NEXT: .cfi_offset %r15, -24 +; AVX2-NEXT: Ltmp15: +; AVX2-NEXT: .cfi_offset %rbp, -16 +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: testb $1, %dil +; AVX2-NEXT: je LBB52_2 +; AVX2-NEXT: ## BB#1: ## %cond.load +; AVX2-NEXT: movzbl (%rax), %ebp +; AVX2-NEXT: vmovd %ebp, %xmm2 +; AVX2-NEXT: LBB52_2: ## %else +; AVX2-NEXT: testb $1, %sil +; AVX2-NEXT: je LBB52_4 +; AVX2-NEXT: ## BB#3: ## %cond.load1 +; AVX2-NEXT: vpinsrb $1, 1(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_4: ## %else2 +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB52_6 +; AVX2-NEXT: ## BB#5: ## %cond.load4 +; AVX2-NEXT: vpinsrb $2, 2(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_6: ## %else5 +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB52_8 +; AVX2-NEXT: ## BB#7: ## %cond.load7 +; AVX2-NEXT: vpinsrb $3, 3(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_8: ## %else8 +; AVX2-NEXT: testb $1, %r8b +; AVX2-NEXT: je LBB52_10 +; AVX2-NEXT: ## BB#9: ## %cond.load10 +; AVX2-NEXT: vpinsrb $4, 4(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_10: ## %else11 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; AVX2-NEXT: testb $1, %r9b +; AVX2-NEXT: je LBB52_12 +; AVX2-NEXT: ## BB#11: ## %cond.load13 +; AVX2-NEXT: vpinsrb $5, 5(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_12: ## %else14 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %r11b +; AVX2-NEXT: testb $1, %r10b +; AVX2-NEXT: je LBB52_14 +; AVX2-NEXT: ## BB#13: ## %cond.load16 +; AVX2-NEXT: vpinsrb $6, 6(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_14: ## %else17 +; AVX2-NEXT: testb $1, %r11b +; AVX2-NEXT: je LBB52_16 +; AVX2-NEXT: ## BB#15: ## %cond.load19 +; AVX2-NEXT: vpinsrb $7, 7(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_16: ## %else20 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_18 +; AVX2-NEXT: ## BB#17: ## %cond.load22 +; AVX2-NEXT: vpinsrb $8, 8(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_18: ## %else23 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_20 +; AVX2-NEXT: ## BB#19: ## %cond.load25 +; AVX2-NEXT: vpinsrb $9, 9(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_20: ## %else26 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_22 +; AVX2-NEXT: ## BB#21: ## %cond.load28 +; AVX2-NEXT: vpinsrb $10, 10(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_22: ## %else29 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_24 +; AVX2-NEXT: ## BB#23: ## %cond.load31 +; AVX2-NEXT: vpinsrb $11, 11(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_24: ## %else32 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %bl +; AVX2-NEXT: testb $1, %bpl +; AVX2-NEXT: je LBB52_26 +; AVX2-NEXT: ## BB#25: ## %cond.load34 +; AVX2-NEXT: vpinsrb $12, 12(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_26: ## %else35 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; AVX2-NEXT: testb $1, %bl +; AVX2-NEXT: je LBB52_28 +; AVX2-NEXT: ## BB#27: ## %cond.load37 +; AVX2-NEXT: vpinsrb $13, 13(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_28: ## %else38 +; AVX2-NEXT: testb $1, %r14b +; AVX2-NEXT: je LBB52_30 +; AVX2-NEXT: ## BB#29: ## %cond.load40 +; AVX2-NEXT: vpinsrb $14, 14(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_30: ## %else41 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %r13b +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_32 +; AVX2-NEXT: ## BB#31: ## %cond.load43 +; AVX2-NEXT: vpinsrb $15, 15(%rax), %xmm2, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7] +; AVX2-NEXT: LBB52_32: ## %else44 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; AVX2-NEXT: testb $1, %r13b +; AVX2-NEXT: je LBB52_34 +; AVX2-NEXT: ## BB#33: ## %cond.load46 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $0, 16(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_34: ## %else47 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %r15b +; AVX2-NEXT: testb $1, %r12b +; AVX2-NEXT: je LBB52_36 +; AVX2-NEXT: ## BB#35: ## %cond.load49 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $1, 17(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_36: ## %else50 +; AVX2-NEXT: testb $1, %r15b +; AVX2-NEXT: je LBB52_38 +; AVX2-NEXT: ## BB#37: ## %cond.load52 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $2, 18(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_38: ## %else53 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_40 +; AVX2-NEXT: ## BB#39: ## %cond.load55 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $3, 19(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_40: ## %else56 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_42 +; AVX2-NEXT: ## BB#41: ## %cond.load58 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $4, 20(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_42: ## %else59 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_44 +; AVX2-NEXT: ## BB#43: ## %cond.load61 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $5, 21(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_44: ## %else62 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_46 +; AVX2-NEXT: ## BB#45: ## %cond.load64 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $6, 22(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_46: ## %else65 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_48 +; AVX2-NEXT: ## BB#47: ## %cond.load67 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $7, 23(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_48: ## %else68 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_50 +; AVX2-NEXT: ## BB#49: ## %cond.load70 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $8, 24(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_50: ## %else71 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_52 +; AVX2-NEXT: ## BB#51: ## %cond.load73 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $9, 25(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_52: ## %else74 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_54 +; AVX2-NEXT: ## BB#53: ## %cond.load76 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $10, 26(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_54: ## %else77 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_56 +; AVX2-NEXT: ## BB#55: ## %cond.load79 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $11, 27(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_56: ## %else80 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_58 +; AVX2-NEXT: ## BB#57: ## %cond.load82 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $12, 28(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_58: ## %else83 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_60 +; AVX2-NEXT: ## BB#59: ## %cond.load85 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $13, 29(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_60: ## %else86 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_62 +; AVX2-NEXT: ## BB#61: ## %cond.load88 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $14, 30(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_62: ## %else89 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_64 +; AVX2-NEXT: ## BB#63: ## %cond.load91 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 +; AVX2-NEXT: vpinsrb $15, 31(%rax), %xmm3, %xmm3 +; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; AVX2-NEXT: LBB52_64: ## %else92 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_66 +; AVX2-NEXT: ## BB#65: ## %cond.load94 +; AVX2-NEXT: vpinsrb $0, 32(%rax), %xmm0, %xmm3 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm3[0,1,2,3],ymm0[4,5,6,7] +; AVX2-NEXT: LBB52_66: ## %else95 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_68 +; AVX2-NEXT: ## BB#67: ## %cond.load97 +; AVX2-NEXT: vpinsrb $1, 33(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_68: ## %else98 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_70 +; AVX2-NEXT: ## BB#69: ## %cond.load100 +; AVX2-NEXT: vpinsrb $2, 34(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_70: ## %else101 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_72 +; AVX2-NEXT: ## BB#71: ## %cond.load103 +; AVX2-NEXT: vpinsrb $3, 35(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_72: ## %else104 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_74 +; AVX2-NEXT: ## BB#73: ## %cond.load106 +; AVX2-NEXT: vpinsrb $4, 36(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_74: ## %else107 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_76 +; AVX2-NEXT: ## BB#75: ## %cond.load109 +; AVX2-NEXT: vpinsrb $5, 37(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_76: ## %else110 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_78 +; AVX2-NEXT: ## BB#77: ## %cond.load112 +; AVX2-NEXT: vpinsrb $6, 38(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_78: ## %else113 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_80 +; AVX2-NEXT: ## BB#79: ## %cond.load115 +; AVX2-NEXT: vpinsrb $7, 39(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_80: ## %else116 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_82 +; AVX2-NEXT: ## BB#81: ## %cond.load118 +; AVX2-NEXT: vpinsrb $8, 40(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_82: ## %else119 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_84 +; AVX2-NEXT: ## BB#83: ## %cond.load121 +; AVX2-NEXT: vpinsrb $9, 41(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_84: ## %else122 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_86 +; AVX2-NEXT: ## BB#85: ## %cond.load124 +; AVX2-NEXT: vpinsrb $10, 42(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_86: ## %else125 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_88 +; AVX2-NEXT: ## BB#87: ## %cond.load127 +; AVX2-NEXT: vpinsrb $11, 43(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_88: ## %else128 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_90 +; AVX2-NEXT: ## BB#89: ## %cond.load130 +; AVX2-NEXT: vpinsrb $12, 44(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_90: ## %else131 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_92 +; AVX2-NEXT: ## BB#91: ## %cond.load133 +; AVX2-NEXT: vpinsrb $13, 45(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_92: ## %else134 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_94 +; AVX2-NEXT: ## BB#93: ## %cond.load136 +; AVX2-NEXT: vpinsrb $14, 46(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_94: ## %else137 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_96 +; AVX2-NEXT: ## BB#95: ## %cond.load139 +; AVX2-NEXT: vpinsrb $15, 47(%rax), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB52_96: ## %else140 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_98 +; AVX2-NEXT: ## BB#97: ## %cond.load142 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $0, 48(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_98: ## %else143 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_100 +; AVX2-NEXT: ## BB#99: ## %cond.load145 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $1, 49(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_100: ## %else146 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_102 +; AVX2-NEXT: ## BB#101: ## %cond.load148 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $2, 50(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_102: ## %else149 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_104 +; AVX2-NEXT: ## BB#103: ## %cond.load151 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $3, 51(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_104: ## %else152 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_106 +; AVX2-NEXT: ## BB#105: ## %cond.load154 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $4, 52(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_106: ## %else155 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_108 +; AVX2-NEXT: ## BB#107: ## %cond.load157 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $5, 53(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_108: ## %else158 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_110 +; AVX2-NEXT: ## BB#109: ## %cond.load160 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $6, 54(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_110: ## %else161 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_112 +; AVX2-NEXT: ## BB#111: ## %cond.load163 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $7, 55(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_112: ## %else164 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_114 +; AVX2-NEXT: ## BB#113: ## %cond.load166 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $8, 56(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_114: ## %else167 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_116 +; AVX2-NEXT: ## BB#115: ## %cond.load169 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $9, 57(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_116: ## %else170 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_118 +; AVX2-NEXT: ## BB#117: ## %cond.load172 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $10, 58(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_118: ## %else173 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_120 +; AVX2-NEXT: ## BB#119: ## %cond.load175 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $11, 59(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_120: ## %else176 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_122 +; AVX2-NEXT: ## BB#121: ## %cond.load178 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $12, 60(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_122: ## %else179 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: je LBB52_124 +; AVX2-NEXT: ## BB#123: ## %cond.load181 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $13, 61(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_124: ## %else182 +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: jne LBB52_126 +; AVX2-NEXT: ## BB#125: +; AVX2-NEXT: movq %rax, %rdi +; AVX2-NEXT: jmp LBB52_127 +; AVX2-NEXT: LBB52_126: ## %cond.load184 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: movq %rax, %rdi +; AVX2-NEXT: vpinsrb $14, 62(%rax), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_127: ## %else185 +; AVX2-NEXT: movl %ebp, %eax +; AVX2-NEXT: testb $1, {{[0-9]+}}(%rsp) +; AVX2-NEXT: movl %r9d, {{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movl %r8d, (%rsp) ## 4-byte Spill +; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movl %esi, %ebp +; AVX2-NEXT: je LBB52_129 +; AVX2-NEXT: ## BB#128: ## %cond.load187 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrb $15, 63(%rdi), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB52_129: ## %else188 +; AVX2-NEXT: movzbl %r10b, %ecx +; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl %r11b, %ecx +; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX2-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl %al, %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl %bl, %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl %r14b, %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl %r12b, %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl %r13b, %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl %r15b, %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %esi +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %edx +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; AVX2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX2-NEXT: movl -{{[0-9]+}}(%rsp), %edi ## 4-byte Reload +; AVX2-NEXT: movzbl %dil, %r13d +; AVX2-NEXT: vmovd %r13d, %xmm4 +; AVX2-NEXT: movzbl %bpl, %ebp +; AVX2-NEXT: vpinsrb $1, %ebp, %xmm4, %xmm4 +; AVX2-NEXT: movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload +; AVX2-NEXT: movzbl %bpl, %ebp +; AVX2-NEXT: vpinsrb $2, %ebp, %xmm4, %xmm4 +; AVX2-NEXT: movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload +; AVX2-NEXT: movzbl %bpl, %ebp +; AVX2-NEXT: vpinsrb $3, %ebp, %xmm4, %xmm4 +; AVX2-NEXT: movl (%rsp), %ebp ## 4-byte Reload +; AVX2-NEXT: movzbl %bpl, %ebp +; AVX2-NEXT: vpinsrb $4, %ebp, %xmm4, %xmm4 +; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %ebp ## 4-byte Reload +; AVX2-NEXT: movzbl %bpl, %ebp +; AVX2-NEXT: vpinsrb $5, %ebp, %xmm4, %xmm4 +; AVX2-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm4, %xmm4 ## 4-byte Folded Reload +; AVX2-NEXT: vmovd -{{[0-9]+}}(%rsp), %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: ## xmm5 = mem[0],zero,zero,zero +; AVX2-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload +; AVX2-NEXT: vmovd %r12d, %xmm6 +; AVX2-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; AVX2-NEXT: vpinsrb $2, %r15d, %xmm6, %xmm6 +; AVX2-NEXT: vpinsrb $3, %r14d, %xmm6, %xmm6 +; AVX2-NEXT: vpinsrb $4, %ebx, %xmm6, %xmm6 +; AVX2-NEXT: vpinsrb $5, %r11d, %xmm6, %xmm6 +; AVX2-NEXT: vpinsrb $6, %r9d, %xmm6, %xmm6 +; AVX2-NEXT: vpinsrb $7, %esi, %xmm6, %xmm6 +; AVX2-NEXT: vpinsrb $8, %eax, %xmm6, %xmm6 +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d +; AVX2-NEXT: vpinsrb $9, %ecx, %xmm6, %xmm6 +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d +; AVX2-NEXT: vpinsrb $10, %edx, %xmm6, %xmm6 +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d +; AVX2-NEXT: vpinsrb $11, %r8d, %xmm6, %xmm6 +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX2-NEXT: vpinsrb $12, %r10d, %xmm6, %xmm6 +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; AVX2-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp +; AVX2-NEXT: vpinsrb $14, %eax, %xmm6, %xmm6 +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; AVX2-NEXT: vpinsrb $15, %r15d, %xmm6, %xmm6 +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %esi +; AVX2-NEXT: vmovd %r12d, %xmm7 +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d +; AVX2-NEXT: movzbl {{[0-9]+}}(%rsp), %edx +; AVX2-NEXT: vpinsrb $1, %r9d, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $2, %r11d, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $3, %r14d, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $4, %r13d, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $5, %ecx, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $6, %r8d, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $7, %ebx, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $8, %r10d, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $9, %ebp, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $10, %eax, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $11, %edi, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $12, %r15d, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $13, %esi, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $14, %r12d, %xmm7, %xmm7 +; AVX2-NEXT: vpinsrb $15, %edx, %xmm7, %xmm7 +; AVX2-NEXT: vinserti128 $1, %xmm5, %ymm4, %ymm4 +; AVX2-NEXT: vpsllw $7, %ymm4, %ymm4 +; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 +; AVX2-NEXT: vpblendvb %ymm4, %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vinserti128 $1, %xmm7, %ymm6, %ymm2 +; AVX2-NEXT: vpsllw $7, %ymm2, %ymm2 +; AVX2-NEXT: vpand %ymm5, %ymm2, %ymm2 +; AVX2-NEXT: vpblendvb %ymm2, %ymm3, %ymm1, %ymm1 +; AVX2-NEXT: addq $8, %rsp +; AVX2-NEXT: popq %rbx +; AVX2-NEXT: popq %r12 +; AVX2-NEXT: popq %r13 +; AVX2-NEXT: popq %r14 +; AVX2-NEXT: popq %r15 +; AVX2-NEXT: popq %rbp +; AVX2-NEXT: retq +; +; AVX512F-LABEL: test_mask_load_64xi8: +; AVX512F: ## BB#0: +; AVX512F-NEXT: pushq %rbp +; AVX512F-NEXT: Ltmp0: +; AVX512F-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-NEXT: pushq %r15 +; AVX512F-NEXT: Ltmp1: +; AVX512F-NEXT: .cfi_def_cfa_offset 24 +; AVX512F-NEXT: pushq %r14 +; AVX512F-NEXT: Ltmp2: +; AVX512F-NEXT: .cfi_def_cfa_offset 32 +; AVX512F-NEXT: pushq %r13 +; AVX512F-NEXT: Ltmp3: +; AVX512F-NEXT: .cfi_def_cfa_offset 40 +; AVX512F-NEXT: pushq %r12 +; AVX512F-NEXT: Ltmp4: +; AVX512F-NEXT: .cfi_def_cfa_offset 48 +; AVX512F-NEXT: pushq %rbx +; AVX512F-NEXT: Ltmp5: +; AVX512F-NEXT: .cfi_def_cfa_offset 56 +; AVX512F-NEXT: subq $76, %rsp +; AVX512F-NEXT: Ltmp6: +; AVX512F-NEXT: .cfi_def_cfa_offset 132 +; AVX512F-NEXT: Ltmp7: +; AVX512F-NEXT: .cfi_offset %rbx, -56 +; AVX512F-NEXT: Ltmp8: +; AVX512F-NEXT: .cfi_offset %r12, -48 +; AVX512F-NEXT: Ltmp9: +; AVX512F-NEXT: .cfi_offset %r13, -40 +; AVX512F-NEXT: Ltmp10: +; AVX512F-NEXT: .cfi_offset %r14, -32 +; AVX512F-NEXT: Ltmp11: +; AVX512F-NEXT: .cfi_offset %r15, -24 +; AVX512F-NEXT: Ltmp12: +; AVX512F-NEXT: .cfi_offset %rbp, -16 +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_2 +; AVX512F-NEXT: ## BB#1: ## %cond.load +; AVX512F-NEXT: movzbl (%rdi), %eax +; AVX512F-NEXT: vmovd %eax, %xmm0 +; AVX512F-NEXT: LBB52_2: ## %else +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_4 +; AVX512F-NEXT: ## BB#3: ## %cond.load1 +; AVX512F-NEXT: vpinsrb $1, 1(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_4: ## %else2 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_6 +; AVX512F-NEXT: ## BB#5: ## %cond.load4 +; AVX512F-NEXT: vpinsrb $2, 2(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_6: ## %else5 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_8 +; AVX512F-NEXT: ## BB#7: ## %cond.load7 +; AVX512F-NEXT: vpinsrb $3, 3(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_8: ## %else8 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_10 +; AVX512F-NEXT: ## BB#9: ## %cond.load10 +; AVX512F-NEXT: vpinsrb $4, 4(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_10: ## %else11 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_12 +; AVX512F-NEXT: ## BB#11: ## %cond.load13 +; AVX512F-NEXT: vpinsrb $5, 5(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_12: ## %else14 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_14 +; AVX512F-NEXT: ## BB#13: ## %cond.load16 +; AVX512F-NEXT: vpinsrb $6, 6(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_14: ## %else17 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_16 +; AVX512F-NEXT: ## BB#15: ## %cond.load19 +; AVX512F-NEXT: vpinsrb $7, 7(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_16: ## %else20 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_18 +; AVX512F-NEXT: ## BB#17: ## %cond.load22 +; AVX512F-NEXT: vpinsrb $8, 8(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_18: ## %else23 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, (%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_20 +; AVX512F-NEXT: ## BB#19: ## %cond.load25 +; AVX512F-NEXT: vpinsrb $9, 9(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_20: ## %else26 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_22 +; AVX512F-NEXT: ## BB#21: ## %cond.load28 +; AVX512F-NEXT: vpinsrb $10, 10(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_22: ## %else29 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_24 +; AVX512F-NEXT: ## BB#23: ## %cond.load31 +; AVX512F-NEXT: vpinsrb $11, 11(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_24: ## %else32 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_26 +; AVX512F-NEXT: ## BB#25: ## %cond.load34 +; AVX512F-NEXT: vpinsrb $12, 12(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_26: ## %else35 +; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_28 +; AVX512F-NEXT: ## BB#27: ## %cond.load37 +; AVX512F-NEXT: vpinsrb $13, 13(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_28: ## %else38 +; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_30 +; AVX512F-NEXT: ## BB#29: ## %cond.load40 +; AVX512F-NEXT: vpinsrb $14, 14(%rdi), %xmm0, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_30: ## %else41 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1 +; AVX512F-NEXT: kshiftlw $0, %k0, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_32 +; AVX512F-NEXT: ## BB#31: ## %cond.load43 +; AVX512F-NEXT: vpinsrb $15, 15(%rdi), %xmm0, %xmm1 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_32: ## %else44 +; AVX512F-NEXT: kshiftlw $15, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_34 +; AVX512F-NEXT: ## BB#33: ## %cond.load46 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $0, 16(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_34: ## %else47 +; AVX512F-NEXT: kshiftlw $14, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_36 +; AVX512F-NEXT: ## BB#35: ## %cond.load49 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $1, 17(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_36: ## %else50 +; AVX512F-NEXT: kshiftlw $13, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_38 +; AVX512F-NEXT: ## BB#37: ## %cond.load52 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $2, 18(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_38: ## %else53 +; AVX512F-NEXT: kshiftlw $12, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_40 +; AVX512F-NEXT: ## BB#39: ## %cond.load55 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $3, 19(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_40: ## %else56 +; AVX512F-NEXT: kshiftlw $11, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_42 +; AVX512F-NEXT: ## BB#41: ## %cond.load58 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $4, 20(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_42: ## %else59 +; AVX512F-NEXT: kshiftlw $10, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_44 +; AVX512F-NEXT: ## BB#43: ## %cond.load61 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $5, 21(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_44: ## %else62 +; AVX512F-NEXT: kshiftlw $9, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_46 +; AVX512F-NEXT: ## BB#45: ## %cond.load64 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $6, 22(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_46: ## %else65 +; AVX512F-NEXT: kshiftlw $8, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_48 +; AVX512F-NEXT: ## BB#47: ## %cond.load67 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $7, 23(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_48: ## %else68 +; AVX512F-NEXT: kshiftlw $7, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_50 +; AVX512F-NEXT: ## BB#49: ## %cond.load70 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $8, 24(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_50: ## %else71 +; AVX512F-NEXT: kshiftlw $6, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_52 +; AVX512F-NEXT: ## BB#51: ## %cond.load73 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $9, 25(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_52: ## %else74 +; AVX512F-NEXT: kshiftlw $5, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_54 +; AVX512F-NEXT: ## BB#53: ## %cond.load76 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $10, 26(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_54: ## %else77 +; AVX512F-NEXT: kshiftlw $4, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_56 +; AVX512F-NEXT: ## BB#55: ## %cond.load79 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $11, 27(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_56: ## %else80 +; AVX512F-NEXT: kshiftlw $3, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_58 +; AVX512F-NEXT: ## BB#57: ## %cond.load82 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $12, 28(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_58: ## %else83 +; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm1 +; AVX512F-NEXT: kshiftlw $2, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_60 +; AVX512F-NEXT: ## BB#59: ## %cond.load85 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX512F-NEXT: vpinsrb $13, 29(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_60: ## %else86 +; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1 +; AVX512F-NEXT: kshiftlw $1, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_62 +; AVX512F-NEXT: ## BB#61: ## %cond.load88 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX512F-NEXT: vpinsrb $14, 30(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_62: ## %else89 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512F-NEXT: kshiftlw $0, %k1, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_64 +; AVX512F-NEXT: ## BB#63: ## %cond.load91 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrb $15, 31(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB52_64: ## %else92 +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_66 +; AVX512F-NEXT: ## BB#65: ## %cond.load94 +; AVX512F-NEXT: vpinsrb $0, 32(%rdi), %xmm0, %xmm1 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB52_66: ## %else95 +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_68 +; AVX512F-NEXT: ## BB#67: ## %cond.load97 +; AVX512F-NEXT: vpinsrb $1, 33(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_68: ## %else98 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_70 +; AVX512F-NEXT: ## BB#69: ## %cond.load100 +; AVX512F-NEXT: vpinsrb $2, 34(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_70: ## %else101 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_72 +; AVX512F-NEXT: ## BB#71: ## %cond.load103 +; AVX512F-NEXT: vpinsrb $3, 35(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_72: ## %else104 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_74 +; AVX512F-NEXT: ## BB#73: ## %cond.load106 +; AVX512F-NEXT: vpinsrb $4, 36(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_74: ## %else107 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_76 +; AVX512F-NEXT: ## BB#75: ## %cond.load109 +; AVX512F-NEXT: vpinsrb $5, 37(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_76: ## %else110 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_78 +; AVX512F-NEXT: ## BB#77: ## %cond.load112 +; AVX512F-NEXT: vpinsrb $6, 38(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_78: ## %else113 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_80 +; AVX512F-NEXT: ## BB#79: ## %cond.load115 +; AVX512F-NEXT: vpinsrb $7, 39(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_80: ## %else116 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_82 +; AVX512F-NEXT: ## BB#81: ## %cond.load118 +; AVX512F-NEXT: vpinsrb $8, 40(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_82: ## %else119 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_84 +; AVX512F-NEXT: ## BB#83: ## %cond.load121 +; AVX512F-NEXT: vpinsrb $9, 41(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_84: ## %else122 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_86 +; AVX512F-NEXT: ## BB#85: ## %cond.load124 +; AVX512F-NEXT: vpinsrb $10, 42(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_86: ## %else125 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_88 +; AVX512F-NEXT: ## BB#87: ## %cond.load127 +; AVX512F-NEXT: vpinsrb $11, 43(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_88: ## %else128 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_90 +; AVX512F-NEXT: ## BB#89: ## %cond.load130 +; AVX512F-NEXT: vpinsrb $12, 44(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_90: ## %else131 +; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm2 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_92 +; AVX512F-NEXT: ## BB#91: ## %cond.load133 +; AVX512F-NEXT: vpinsrb $13, 45(%rdi), %xmm1, %xmm3 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_92: ## %else134 +; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_94 +; AVX512F-NEXT: ## BB#93: ## %cond.load136 +; AVX512F-NEXT: vpinsrb $14, 46(%rdi), %xmm1, %xmm3 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_94: ## %else137 +; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k1 +; AVX512F-NEXT: kshiftlw $0, %k0, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_96 +; AVX512F-NEXT: ## BB#95: ## %cond.load139 +; AVX512F-NEXT: vpinsrb $15, 47(%rdi), %xmm1, %xmm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: LBB52_96: ## %else140 +; AVX512F-NEXT: kshiftlw $15, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_98 +; AVX512F-NEXT: ## BB#97: ## %cond.load142 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $0, 48(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_98: ## %else143 +; AVX512F-NEXT: kshiftlw $14, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_100 +; AVX512F-NEXT: ## BB#99: ## %cond.load145 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $1, 49(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_100: ## %else146 +; AVX512F-NEXT: kshiftlw $13, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_102 +; AVX512F-NEXT: ## BB#101: ## %cond.load148 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $2, 50(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_102: ## %else149 +; AVX512F-NEXT: kshiftlw $12, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_104 +; AVX512F-NEXT: ## BB#103: ## %cond.load151 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $3, 51(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_104: ## %else152 +; AVX512F-NEXT: kshiftlw $11, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_106 +; AVX512F-NEXT: ## BB#105: ## %cond.load154 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $4, 52(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_106: ## %else155 +; AVX512F-NEXT: kshiftlw $10, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_108 +; AVX512F-NEXT: ## BB#107: ## %cond.load157 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $5, 53(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_108: ## %else158 +; AVX512F-NEXT: kshiftlw $9, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_110 +; AVX512F-NEXT: ## BB#109: ## %cond.load160 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $6, 54(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_110: ## %else161 +; AVX512F-NEXT: kshiftlw $8, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_112 +; AVX512F-NEXT: ## BB#111: ## %cond.load163 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $7, 55(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_112: ## %else164 +; AVX512F-NEXT: kshiftlw $7, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Folded Spill +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_114 +; AVX512F-NEXT: ## BB#113: ## %cond.load166 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $8, 56(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_114: ## %else167 +; AVX512F-NEXT: kshiftlw $6, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_116 +; AVX512F-NEXT: ## BB#115: ## %cond.load169 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $9, 57(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_116: ## %else170 +; AVX512F-NEXT: kshiftlw $5, %k1, %k3 +; AVX512F-NEXT: kshiftrw $15, %k3, %k3 +; AVX512F-NEXT: kmovw %k3, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_118 +; AVX512F-NEXT: ## BB#117: ## %cond.load172 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $10, 58(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_118: ## %else173 +; AVX512F-NEXT: kshiftlw $4, %k1, %k4 +; AVX512F-NEXT: kshiftrw $15, %k4, %k4 +; AVX512F-NEXT: kmovw %k4, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_120 +; AVX512F-NEXT: ## BB#119: ## %cond.load175 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $11, 59(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_120: ## %else176 +; AVX512F-NEXT: kshiftlw $3, %k1, %k5 +; AVX512F-NEXT: kshiftrw $15, %k5, %k5 +; AVX512F-NEXT: kmovw %k5, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_122 +; AVX512F-NEXT: ## BB#121: ## %cond.load178 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $12, 60(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_122: ## %else179 +; AVX512F-NEXT: kshiftlw $2, %k1, %k6 +; AVX512F-NEXT: kshiftrw $15, %k6, %k6 +; AVX512F-NEXT: kmovw %k6, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_124 +; AVX512F-NEXT: ## BB#123: ## %cond.load181 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $13, 61(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_124: ## %else182 +; AVX512F-NEXT: kshiftlw $1, %k1, %k7 +; AVX512F-NEXT: kshiftrw $15, %k7, %k7 +; AVX512F-NEXT: kmovw %k7, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_126 +; AVX512F-NEXT: ## BB#125: ## %cond.load184 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $14, 62(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_126: ## %else185 +; AVX512F-NEXT: kshiftlw $0, %k1, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB52_128 +; AVX512F-NEXT: ## BB#127: ## %cond.load187 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpinsrb $15, 63(%rdi), %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: LBB52_128: ## %else188 +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw (%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, (%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw %k3, %r12d +; AVX512F-NEXT: kmovw %k4, %r15d +; AVX512F-NEXT: kmovw %k5, %r14d +; AVX512F-NEXT: kmovw %k6, %ebx +; AVX512F-NEXT: kmovw %k7, %r11d +; AVX512F-NEXT: kmovw %k1, %r10d +; AVX512F-NEXT: kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %r8d +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %r9d +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %edi +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %esi +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %edx +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %ecx +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: movl -{{[0-9]+}}(%rsp), %r13d ## 4-byte Reload +; AVX512F-NEXT: vmovd %r13d, %xmm2 +; AVX512F-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $9, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm2, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %ebp ## 4-byte Reload +; AVX512F-NEXT: vmovd %ebp, %xmm3 +; AVX512F-NEXT: vpinsrb $1, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $3, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $4, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $9, (%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $10, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $11, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $13, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $14, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $15, -{{[0-9]+}}(%rsp), %xmm3, %xmm3 ## 4-byte Folded Reload +; AVX512F-NEXT: movl -{{[0-9]+}}(%rsp), %ebp ## 4-byte Reload +; AVX512F-NEXT: vmovd %ebp, %xmm6 +; AVX512F-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $2, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $3, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $4, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $5, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $7, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $8, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $9, {{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %r13d +; AVX512F-NEXT: vpinsrb $10, %r12d, %xmm6, %xmm6 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %r12d +; AVX512F-NEXT: vpinsrb $11, %r15d, %xmm6, %xmm6 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %r15d +; AVX512F-NEXT: vpinsrb $12, %r14d, %xmm6, %xmm6 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %r14d +; AVX512F-NEXT: vpinsrb $13, %ebx, %xmm6, %xmm6 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %ebx +; AVX512F-NEXT: vpinsrb $14, %r11d, %xmm6, %xmm6 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %r11d +; AVX512F-NEXT: vpinsrb $15, %r10d, %xmm6, %xmm6 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %r10d +; AVX512F-NEXT: vmovd %r8d, %xmm7 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 ## 2-byte Folded Reload +; AVX512F-NEXT: kmovw %k0, %r8d +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm2 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] +; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm4, %ymm0 +; AVX512F-NEXT: vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm7, %xmm2 ## 4-byte Folded Reload +; AVX512F-NEXT: vpinsrb $2, %r9d, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $3, %edi, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $4, %esi, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $5, %edx, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $6, %ecx, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $8, %r13d, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $9, %r12d, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $10, %r15d, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $11, %r14d, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $12, %ebx, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $13, %r11d, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $14, %r10d, %xmm2, %xmm2 +; AVX512F-NEXT: vpinsrb $15, %r8d, %xmm2, %xmm2 +; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm2, %ymm2 +; AVX512F-NEXT: vpsllw $7, %ymm2, %ymm2 +; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512F-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm1 +; AVX512F-NEXT: addq $76, %rsp +; AVX512F-NEXT: popq %rbx +; AVX512F-NEXT: popq %r12 +; AVX512F-NEXT: popq %r13 +; AVX512F-NEXT: popq %r14 +; AVX512F-NEXT: popq %r15 +; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: retq +; ; SKX-LABEL: test_mask_load_64xi8: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 @@ -2258,6 +5545,145 @@ define <64 x i8> @test_mask_load_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x declare <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>*, i32, <64 x i1>, <64 x i8>) define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { +; AVX-LABEL: test_mask_load_8xi16: +; AVX: ## BB#0: +; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: ## implicit-def: %XMM1 +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB53_2 +; AVX-NEXT: ## BB#1: ## %cond.load +; AVX-NEXT: movzwl (%rdi), %eax +; AVX-NEXT: vmovd %eax, %xmm1 +; AVX-NEXT: LBB53_2: ## %else +; AVX-NEXT: vpextrb $2, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB53_4 +; AVX-NEXT: ## BB#3: ## %cond.load1 +; AVX-NEXT: vpinsrw $1, 2(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB53_4: ## %else2 +; AVX-NEXT: vpextrb $4, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB53_6 +; AVX-NEXT: ## BB#5: ## %cond.load4 +; AVX-NEXT: vpinsrw $2, 4(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB53_6: ## %else5 +; AVX-NEXT: vpextrb $6, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB53_8 +; AVX-NEXT: ## BB#7: ## %cond.load7 +; AVX-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB53_8: ## %else8 +; AVX-NEXT: vpextrb $8, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB53_10 +; AVX-NEXT: ## BB#9: ## %cond.load10 +; AVX-NEXT: vpinsrw $4, 8(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB53_10: ## %else11 +; AVX-NEXT: vpextrb $10, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB53_12 +; AVX-NEXT: ## BB#11: ## %cond.load13 +; AVX-NEXT: vpinsrw $5, 10(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB53_12: ## %else14 +; AVX-NEXT: vpextrb $12, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB53_14 +; AVX-NEXT: ## BB#13: ## %cond.load16 +; AVX-NEXT: vpinsrw $6, 12(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB53_14: ## %else17 +; AVX-NEXT: vpextrb $14, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB53_16 +; AVX-NEXT: ## BB#15: ## %cond.load19 +; AVX-NEXT: vpinsrw $7, 14(%rdi), %xmm1, %xmm1 +; AVX-NEXT: LBB53_16: ## %else20 +; AVX-NEXT: vpsllw $15, %xmm0, %xmm0 +; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: test_mask_load_8xi16: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 +; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512F-NEXT: kshiftlw $15, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: ## implicit-def: %XMM0 +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB53_2 +; AVX512F-NEXT: ## BB#1: ## %cond.load +; AVX512F-NEXT: movzwl (%rdi), %eax +; AVX512F-NEXT: vmovd %eax, %xmm0 +; AVX512F-NEXT: LBB53_2: ## %else +; AVX512F-NEXT: kshiftlw $14, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB53_4 +; AVX512F-NEXT: ## BB#3: ## %cond.load1 +; AVX512F-NEXT: vpinsrw $1, 2(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB53_4: ## %else2 +; AVX512F-NEXT: kshiftlw $13, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB53_6 +; AVX512F-NEXT: ## BB#5: ## %cond.load4 +; AVX512F-NEXT: vpinsrw $2, 4(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB53_6: ## %else5 +; AVX512F-NEXT: kshiftlw $12, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB53_8 +; AVX512F-NEXT: ## BB#7: ## %cond.load7 +; AVX512F-NEXT: vpinsrw $3, 6(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB53_8: ## %else8 +; AVX512F-NEXT: kshiftlw $11, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB53_10 +; AVX512F-NEXT: ## BB#9: ## %cond.load10 +; AVX512F-NEXT: vpinsrw $4, 8(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB53_10: ## %else11 +; AVX512F-NEXT: kshiftlw $10, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB53_12 +; AVX512F-NEXT: ## BB#11: ## %cond.load13 +; AVX512F-NEXT: vpinsrw $5, 10(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB53_12: ## %else14 +; AVX512F-NEXT: kshiftlw $9, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB53_14 +; AVX512F-NEXT: ## BB#13: ## %cond.load16 +; AVX512F-NEXT: vpinsrw $6, 12(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB53_14: ## %else17 +; AVX512F-NEXT: kshiftlw $8, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB53_16 +; AVX512F-NEXT: ## BB#15: ## %cond.load19 +; AVX512F-NEXT: vpinsrw $7, 14(%rdi), %xmm0, %xmm0 +; AVX512F-NEXT: LBB53_16: ## %else20 +; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm1 {%k1} {z} +; AVX512F-NEXT: vpmovqw %zmm1, %xmm1 +; AVX512F-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpxor %xmm2, %xmm1, %xmm2 +; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; ; SKX-LABEL: test_mask_load_8xi16: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -2270,6 +5696,431 @@ define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i1 declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>) define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { +; AVX1-LABEL: test_mask_load_16xi16: +; AVX1: ## BB#0: +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: ## implicit-def: %YMM1 +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_2 +; AVX1-NEXT: ## BB#1: ## %cond.load +; AVX1-NEXT: movzwl (%rdi), %eax +; AVX1-NEXT: vmovd %eax, %xmm1 +; AVX1-NEXT: LBB54_2: ## %else +; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_4 +; AVX1-NEXT: ## BB#3: ## %cond.load1 +; AVX1-NEXT: vpinsrw $1, 2(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB54_4: ## %else2 +; AVX1-NEXT: vpextrb $2, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_6 +; AVX1-NEXT: ## BB#5: ## %cond.load4 +; AVX1-NEXT: vpinsrw $2, 4(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB54_6: ## %else5 +; AVX1-NEXT: vpextrb $3, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_8 +; AVX1-NEXT: ## BB#7: ## %cond.load7 +; AVX1-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB54_8: ## %else8 +; AVX1-NEXT: vpextrb $4, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_10 +; AVX1-NEXT: ## BB#9: ## %cond.load10 +; AVX1-NEXT: vpinsrw $4, 8(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB54_10: ## %else11 +; AVX1-NEXT: vpextrb $5, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_12 +; AVX1-NEXT: ## BB#11: ## %cond.load13 +; AVX1-NEXT: vpinsrw $5, 10(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB54_12: ## %else14 +; AVX1-NEXT: vpextrb $6, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_14 +; AVX1-NEXT: ## BB#13: ## %cond.load16 +; AVX1-NEXT: vpinsrw $6, 12(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB54_14: ## %else17 +; AVX1-NEXT: vpextrb $7, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_16 +; AVX1-NEXT: ## BB#15: ## %cond.load19 +; AVX1-NEXT: vpinsrw $7, 14(%rdi), %xmm1, %xmm2 +; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX1-NEXT: LBB54_16: ## %else20 +; AVX1-NEXT: vpextrb $8, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_18 +; AVX1-NEXT: ## BB#17: ## %cond.load22 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpinsrw $0, 16(%rdi), %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: LBB54_18: ## %else23 +; AVX1-NEXT: vpextrb $9, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_20 +; AVX1-NEXT: ## BB#19: ## %cond.load25 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpinsrw $1, 18(%rdi), %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: LBB54_20: ## %else26 +; AVX1-NEXT: vpextrb $10, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_22 +; AVX1-NEXT: ## BB#21: ## %cond.load28 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpinsrw $2, 20(%rdi), %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: LBB54_22: ## %else29 +; AVX1-NEXT: vpextrb $11, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_24 +; AVX1-NEXT: ## BB#23: ## %cond.load31 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpinsrw $3, 22(%rdi), %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: LBB54_24: ## %else32 +; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_26 +; AVX1-NEXT: ## BB#25: ## %cond.load34 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpinsrw $4, 24(%rdi), %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: LBB54_26: ## %else35 +; AVX1-NEXT: vpextrb $13, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_28 +; AVX1-NEXT: ## BB#27: ## %cond.load37 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpinsrw $5, 26(%rdi), %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: LBB54_28: ## %else38 +; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_30 +; AVX1-NEXT: ## BB#29: ## %cond.load40 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpinsrw $6, 28(%rdi), %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: LBB54_30: ## %else41 +; AVX1-NEXT: vpextrb $15, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB54_32 +; AVX1-NEXT: ## BB#31: ## %cond.load43 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpinsrw $7, 30(%rdi), %xmm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: LBB54_32: ## %else44 +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2 +; AVX1-NEXT: vpsraw $15, %xmm2, %xmm2 +; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_mask_load_16xi16: +; AVX2: ## BB#0: +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: ## implicit-def: %YMM1 +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_2 +; AVX2-NEXT: ## BB#1: ## %cond.load +; AVX2-NEXT: movzwl (%rdi), %eax +; AVX2-NEXT: vmovd %eax, %xmm1 +; AVX2-NEXT: LBB54_2: ## %else +; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_4 +; AVX2-NEXT: ## BB#3: ## %cond.load1 +; AVX2-NEXT: vpinsrw $1, 2(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB54_4: ## %else2 +; AVX2-NEXT: vpextrb $2, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_6 +; AVX2-NEXT: ## BB#5: ## %cond.load4 +; AVX2-NEXT: vpinsrw $2, 4(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB54_6: ## %else5 +; AVX2-NEXT: vpextrb $3, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_8 +; AVX2-NEXT: ## BB#7: ## %cond.load7 +; AVX2-NEXT: vpinsrw $3, 6(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB54_8: ## %else8 +; AVX2-NEXT: vpextrb $4, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_10 +; AVX2-NEXT: ## BB#9: ## %cond.load10 +; AVX2-NEXT: vpinsrw $4, 8(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB54_10: ## %else11 +; AVX2-NEXT: vpextrb $5, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_12 +; AVX2-NEXT: ## BB#11: ## %cond.load13 +; AVX2-NEXT: vpinsrw $5, 10(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB54_12: ## %else14 +; AVX2-NEXT: vpextrb $6, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_14 +; AVX2-NEXT: ## BB#13: ## %cond.load16 +; AVX2-NEXT: vpinsrw $6, 12(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB54_14: ## %else17 +; AVX2-NEXT: vpextrb $7, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_16 +; AVX2-NEXT: ## BB#15: ## %cond.load19 +; AVX2-NEXT: vpinsrw $7, 14(%rdi), %xmm1, %xmm2 +; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVX2-NEXT: LBB54_16: ## %else20 +; AVX2-NEXT: vpextrb $8, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_18 +; AVX2-NEXT: ## BB#17: ## %cond.load22 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpinsrw $0, 16(%rdi), %xmm2, %xmm2 +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX2-NEXT: LBB54_18: ## %else23 +; AVX2-NEXT: vpextrb $9, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_20 +; AVX2-NEXT: ## BB#19: ## %cond.load25 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpinsrw $1, 18(%rdi), %xmm2, %xmm2 +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX2-NEXT: LBB54_20: ## %else26 +; AVX2-NEXT: vpextrb $10, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_22 +; AVX2-NEXT: ## BB#21: ## %cond.load28 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpinsrw $2, 20(%rdi), %xmm2, %xmm2 +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX2-NEXT: LBB54_22: ## %else29 +; AVX2-NEXT: vpextrb $11, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_24 +; AVX2-NEXT: ## BB#23: ## %cond.load31 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpinsrw $3, 22(%rdi), %xmm2, %xmm2 +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX2-NEXT: LBB54_24: ## %else32 +; AVX2-NEXT: vpextrb $12, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_26 +; AVX2-NEXT: ## BB#25: ## %cond.load34 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpinsrw $4, 24(%rdi), %xmm2, %xmm2 +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX2-NEXT: LBB54_26: ## %else35 +; AVX2-NEXT: vpextrb $13, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_28 +; AVX2-NEXT: ## BB#27: ## %cond.load37 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpinsrw $5, 26(%rdi), %xmm2, %xmm2 +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX2-NEXT: LBB54_28: ## %else38 +; AVX2-NEXT: vpextrb $14, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_30 +; AVX2-NEXT: ## BB#29: ## %cond.load40 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpinsrw $6, 28(%rdi), %xmm2, %xmm2 +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX2-NEXT: LBB54_30: ## %else41 +; AVX2-NEXT: vpextrb $15, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB54_32 +; AVX2-NEXT: ## BB#31: ## %cond.load43 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpinsrw $7, 30(%rdi), %xmm2, %xmm2 +; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX2-NEXT: LBB54_32: ## %else44 +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0 +; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: test_mask_load_16xi16: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: kshiftlw $15, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: ## implicit-def: %YMM0 +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_2 +; AVX512F-NEXT: ## BB#1: ## %cond.load +; AVX512F-NEXT: movzwl (%rdi), %eax +; AVX512F-NEXT: vmovd %eax, %xmm0 +; AVX512F-NEXT: LBB54_2: ## %else +; AVX512F-NEXT: kshiftlw $14, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_4 +; AVX512F-NEXT: ## BB#3: ## %cond.load1 +; AVX512F-NEXT: vpinsrw $1, 2(%rdi), %xmm0, %xmm1 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB54_4: ## %else2 +; AVX512F-NEXT: kshiftlw $13, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_6 +; AVX512F-NEXT: ## BB#5: ## %cond.load4 +; AVX512F-NEXT: vpinsrw $2, 4(%rdi), %xmm0, %xmm1 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB54_6: ## %else5 +; AVX512F-NEXT: kshiftlw $12, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_8 +; AVX512F-NEXT: ## BB#7: ## %cond.load7 +; AVX512F-NEXT: vpinsrw $3, 6(%rdi), %xmm0, %xmm1 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB54_8: ## %else8 +; AVX512F-NEXT: kshiftlw $11, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_10 +; AVX512F-NEXT: ## BB#9: ## %cond.load10 +; AVX512F-NEXT: vpinsrw $4, 8(%rdi), %xmm0, %xmm1 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB54_10: ## %else11 +; AVX512F-NEXT: kshiftlw $10, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_12 +; AVX512F-NEXT: ## BB#11: ## %cond.load13 +; AVX512F-NEXT: vpinsrw $5, 10(%rdi), %xmm0, %xmm1 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB54_12: ## %else14 +; AVX512F-NEXT: kshiftlw $9, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_14 +; AVX512F-NEXT: ## BB#13: ## %cond.load16 +; AVX512F-NEXT: vpinsrw $6, 12(%rdi), %xmm0, %xmm1 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB54_14: ## %else17 +; AVX512F-NEXT: kshiftlw $8, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_16 +; AVX512F-NEXT: ## BB#15: ## %cond.load19 +; AVX512F-NEXT: vpinsrw $7, 14(%rdi), %xmm0, %xmm1 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB54_16: ## %else20 +; AVX512F-NEXT: kshiftlw $7, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_18 +; AVX512F-NEXT: ## BB#17: ## %cond.load22 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrw $0, 16(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB54_18: ## %else23 +; AVX512F-NEXT: kshiftlw $6, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_20 +; AVX512F-NEXT: ## BB#19: ## %cond.load25 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrw $1, 18(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB54_20: ## %else26 +; AVX512F-NEXT: kshiftlw $5, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_22 +; AVX512F-NEXT: ## BB#21: ## %cond.load28 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrw $2, 20(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB54_22: ## %else29 +; AVX512F-NEXT: kshiftlw $4, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_24 +; AVX512F-NEXT: ## BB#23: ## %cond.load31 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrw $3, 22(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB54_24: ## %else32 +; AVX512F-NEXT: kshiftlw $3, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_26 +; AVX512F-NEXT: ## BB#25: ## %cond.load34 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrw $4, 24(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB54_26: ## %else35 +; AVX512F-NEXT: kshiftlw $2, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_28 +; AVX512F-NEXT: ## BB#27: ## %cond.load37 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrw $5, 26(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB54_28: ## %else38 +; AVX512F-NEXT: kshiftlw $1, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_30 +; AVX512F-NEXT: ## BB#29: ## %cond.load40 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrw $6, 28(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB54_30: ## %else41 +; AVX512F-NEXT: kshiftlw $0, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB54_32 +; AVX512F-NEXT: ## BB#31: ## %cond.load43 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpinsrw $7, 30(%rdi), %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: LBB54_32: ## %else44 +; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1 {%k1} {z} +; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 +; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq +; ; SKX-LABEL: test_mask_load_16xi16: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -2282,6 +6133,777 @@ define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 declare <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>*, i32, <16 x i1>, <16 x i16>) define <32 x i16> @test_mask_load_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) { +; AVX1-LABEL: test_mask_load_32xi16: +; AVX1: ## BB#0: +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_2 +; AVX1-NEXT: ## BB#1: ## %cond.load +; AVX1-NEXT: movzwl (%rdi), %eax +; AVX1-NEXT: vmovd %eax, %xmm3 +; AVX1-NEXT: LBB55_2: ## %else +; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_4 +; AVX1-NEXT: ## BB#3: ## %cond.load1 +; AVX1-NEXT: vpinsrw $1, 2(%rdi), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB55_4: ## %else2 +; AVX1-NEXT: vpextrb $2, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_6 +; AVX1-NEXT: ## BB#5: ## %cond.load4 +; AVX1-NEXT: vpinsrw $2, 4(%rdi), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB55_6: ## %else5 +; AVX1-NEXT: vpextrb $3, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_8 +; AVX1-NEXT: ## BB#7: ## %cond.load7 +; AVX1-NEXT: vpinsrw $3, 6(%rdi), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB55_8: ## %else8 +; AVX1-NEXT: vpextrb $4, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_10 +; AVX1-NEXT: ## BB#9: ## %cond.load10 +; AVX1-NEXT: vpinsrw $4, 8(%rdi), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB55_10: ## %else11 +; AVX1-NEXT: vpextrb $5, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_12 +; AVX1-NEXT: ## BB#11: ## %cond.load13 +; AVX1-NEXT: vpinsrw $5, 10(%rdi), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB55_12: ## %else14 +; AVX1-NEXT: vpextrb $6, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_14 +; AVX1-NEXT: ## BB#13: ## %cond.load16 +; AVX1-NEXT: vpinsrw $6, 12(%rdi), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB55_14: ## %else17 +; AVX1-NEXT: vpextrb $7, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_16 +; AVX1-NEXT: ## BB#15: ## %cond.load19 +; AVX1-NEXT: vpinsrw $7, 14(%rdi), %xmm3, %xmm4 +; AVX1-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX1-NEXT: LBB55_16: ## %else20 +; AVX1-NEXT: vpextrb $8, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_18 +; AVX1-NEXT: ## BB#17: ## %cond.load22 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrw $0, 16(%rdi), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB55_18: ## %else23 +; AVX1-NEXT: vpextrb $9, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_20 +; AVX1-NEXT: ## BB#19: ## %cond.load25 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrw $1, 18(%rdi), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB55_20: ## %else26 +; AVX1-NEXT: vpextrb $10, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_22 +; AVX1-NEXT: ## BB#21: ## %cond.load28 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrw $2, 20(%rdi), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB55_22: ## %else29 +; AVX1-NEXT: vpextrb $11, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_24 +; AVX1-NEXT: ## BB#23: ## %cond.load31 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrw $3, 22(%rdi), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB55_24: ## %else32 +; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_26 +; AVX1-NEXT: ## BB#25: ## %cond.load34 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrw $4, 24(%rdi), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB55_26: ## %else35 +; AVX1-NEXT: vpextrb $13, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_28 +; AVX1-NEXT: ## BB#27: ## %cond.load37 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrw $5, 26(%rdi), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB55_28: ## %else38 +; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_30 +; AVX1-NEXT: ## BB#29: ## %cond.load40 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrw $6, 28(%rdi), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB55_30: ## %else41 +; AVX1-NEXT: vpextrb $15, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_32 +; AVX1-NEXT: ## BB#31: ## %cond.load43 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vpinsrw $7, 30(%rdi), %xmm4, %xmm4 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 +; AVX1-NEXT: LBB55_32: ## %else44 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpextrb $0, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_34 +; AVX1-NEXT: ## BB#33: ## %cond.load46 +; AVX1-NEXT: vpinsrw $0, 32(%rdi), %xmm0, %xmm5 +; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm0[4,5,6,7] +; AVX1-NEXT: LBB55_34: ## %else47 +; AVX1-NEXT: vpextrb $1, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_36 +; AVX1-NEXT: ## BB#35: ## %cond.load49 +; AVX1-NEXT: vpinsrw $1, 34(%rdi), %xmm5, %xmm6 +; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX1-NEXT: LBB55_36: ## %else50 +; AVX1-NEXT: vpextrb $2, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_38 +; AVX1-NEXT: ## BB#37: ## %cond.load52 +; AVX1-NEXT: vpinsrw $2, 36(%rdi), %xmm5, %xmm6 +; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX1-NEXT: LBB55_38: ## %else53 +; AVX1-NEXT: vpextrb $3, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_40 +; AVX1-NEXT: ## BB#39: ## %cond.load55 +; AVX1-NEXT: vpinsrw $3, 38(%rdi), %xmm5, %xmm6 +; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX1-NEXT: LBB55_40: ## %else56 +; AVX1-NEXT: vpextrb $4, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_42 +; AVX1-NEXT: ## BB#41: ## %cond.load58 +; AVX1-NEXT: vpinsrw $4, 40(%rdi), %xmm5, %xmm6 +; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX1-NEXT: LBB55_42: ## %else59 +; AVX1-NEXT: vpextrb $5, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_44 +; AVX1-NEXT: ## BB#43: ## %cond.load61 +; AVX1-NEXT: vpinsrw $5, 42(%rdi), %xmm5, %xmm6 +; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX1-NEXT: LBB55_44: ## %else62 +; AVX1-NEXT: vpextrb $6, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_46 +; AVX1-NEXT: ## BB#45: ## %cond.load64 +; AVX1-NEXT: vpinsrw $6, 44(%rdi), %xmm5, %xmm6 +; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX1-NEXT: LBB55_46: ## %else65 +; AVX1-NEXT: vpextrb $7, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_48 +; AVX1-NEXT: ## BB#47: ## %cond.load67 +; AVX1-NEXT: vpinsrw $7, 46(%rdi), %xmm5, %xmm6 +; AVX1-NEXT: vblendps {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX1-NEXT: LBB55_48: ## %else68 +; AVX1-NEXT: vpextrb $8, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_50 +; AVX1-NEXT: ## BB#49: ## %cond.load70 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 +; AVX1-NEXT: vpinsrw $0, 48(%rdi), %xmm6, %xmm6 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 +; AVX1-NEXT: LBB55_50: ## %else71 +; AVX1-NEXT: vpextrb $9, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_52 +; AVX1-NEXT: ## BB#51: ## %cond.load73 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 +; AVX1-NEXT: vpinsrw $1, 50(%rdi), %xmm6, %xmm6 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 +; AVX1-NEXT: LBB55_52: ## %else74 +; AVX1-NEXT: vpextrb $10, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_54 +; AVX1-NEXT: ## BB#53: ## %cond.load76 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 +; AVX1-NEXT: vpinsrw $2, 52(%rdi), %xmm6, %xmm6 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 +; AVX1-NEXT: LBB55_54: ## %else77 +; AVX1-NEXT: vpextrb $11, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_56 +; AVX1-NEXT: ## BB#55: ## %cond.load79 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 +; AVX1-NEXT: vpinsrw $3, 54(%rdi), %xmm6, %xmm6 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 +; AVX1-NEXT: LBB55_56: ## %else80 +; AVX1-NEXT: vpextrb $12, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_58 +; AVX1-NEXT: ## BB#57: ## %cond.load82 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 +; AVX1-NEXT: vpinsrw $4, 56(%rdi), %xmm6, %xmm6 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 +; AVX1-NEXT: LBB55_58: ## %else83 +; AVX1-NEXT: vpextrb $13, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_60 +; AVX1-NEXT: ## BB#59: ## %cond.load85 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 +; AVX1-NEXT: vpinsrw $5, 58(%rdi), %xmm6, %xmm6 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 +; AVX1-NEXT: LBB55_60: ## %else86 +; AVX1-NEXT: vpextrb $14, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_62 +; AVX1-NEXT: ## BB#61: ## %cond.load88 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 +; AVX1-NEXT: vpinsrw $6, 60(%rdi), %xmm6, %xmm6 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 +; AVX1-NEXT: LBB55_62: ## %else89 +; AVX1-NEXT: vpextrb $15, %xmm4, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB55_64 +; AVX1-NEXT: ## BB#63: ## %cond.load91 +; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 +; AVX1-NEXT: vpinsrw $7, 62(%rdi), %xmm6, %xmm6 +; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 +; AVX1-NEXT: LBB55_64: ## %else92 +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX1-NEXT: vpsllw $15, %xmm6, %xmm6 +; AVX1-NEXT: vpsraw $15, %xmm6, %xmm6 +; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm6, %ymm0 +; AVX1-NEXT: vandnps %ymm1, %ymm0, %ymm1 +; AVX1-NEXT: vandps %ymm0, %ymm3, %ymm0 +; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero +; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1 +; AVX1-NEXT: vpsraw $15, %xmm1, %xmm1 +; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; AVX1-NEXT: vpsllw $15, %xmm3, %xmm3 +; AVX1-NEXT: vpsraw $15, %xmm3, %xmm3 +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 +; AVX1-NEXT: vandnps %ymm2, %ymm1, %ymm2 +; AVX1-NEXT: vandps %ymm1, %ymm5, %ymm1 +; AVX1-NEXT: vorps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_mask_load_32xi16: +; AVX2: ## BB#0: +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_2 +; AVX2-NEXT: ## BB#1: ## %cond.load +; AVX2-NEXT: movzwl (%rdi), %eax +; AVX2-NEXT: vmovd %eax, %xmm3 +; AVX2-NEXT: LBB55_2: ## %else +; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_4 +; AVX2-NEXT: ## BB#3: ## %cond.load1 +; AVX2-NEXT: vpinsrw $1, 2(%rdi), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB55_4: ## %else2 +; AVX2-NEXT: vpextrb $2, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_6 +; AVX2-NEXT: ## BB#5: ## %cond.load4 +; AVX2-NEXT: vpinsrw $2, 4(%rdi), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB55_6: ## %else5 +; AVX2-NEXT: vpextrb $3, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_8 +; AVX2-NEXT: ## BB#7: ## %cond.load7 +; AVX2-NEXT: vpinsrw $3, 6(%rdi), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB55_8: ## %else8 +; AVX2-NEXT: vpextrb $4, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_10 +; AVX2-NEXT: ## BB#9: ## %cond.load10 +; AVX2-NEXT: vpinsrw $4, 8(%rdi), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB55_10: ## %else11 +; AVX2-NEXT: vpextrb $5, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_12 +; AVX2-NEXT: ## BB#11: ## %cond.load13 +; AVX2-NEXT: vpinsrw $5, 10(%rdi), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB55_12: ## %else14 +; AVX2-NEXT: vpextrb $6, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_14 +; AVX2-NEXT: ## BB#13: ## %cond.load16 +; AVX2-NEXT: vpinsrw $6, 12(%rdi), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB55_14: ## %else17 +; AVX2-NEXT: vpextrb $7, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_16 +; AVX2-NEXT: ## BB#15: ## %cond.load19 +; AVX2-NEXT: vpinsrw $7, 14(%rdi), %xmm3, %xmm4 +; AVX2-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX2-NEXT: LBB55_16: ## %else20 +; AVX2-NEXT: vpextrb $8, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_18 +; AVX2-NEXT: ## BB#17: ## %cond.load22 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrw $0, 16(%rdi), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB55_18: ## %else23 +; AVX2-NEXT: vpextrb $9, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_20 +; AVX2-NEXT: ## BB#19: ## %cond.load25 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrw $1, 18(%rdi), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB55_20: ## %else26 +; AVX2-NEXT: vpextrb $10, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_22 +; AVX2-NEXT: ## BB#21: ## %cond.load28 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrw $2, 20(%rdi), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB55_22: ## %else29 +; AVX2-NEXT: vpextrb $11, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_24 +; AVX2-NEXT: ## BB#23: ## %cond.load31 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrw $3, 22(%rdi), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB55_24: ## %else32 +; AVX2-NEXT: vpextrb $12, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_26 +; AVX2-NEXT: ## BB#25: ## %cond.load34 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrw $4, 24(%rdi), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB55_26: ## %else35 +; AVX2-NEXT: vpextrb $13, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_28 +; AVX2-NEXT: ## BB#27: ## %cond.load37 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrw $5, 26(%rdi), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB55_28: ## %else38 +; AVX2-NEXT: vpextrb $14, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_30 +; AVX2-NEXT: ## BB#29: ## %cond.load40 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrw $6, 28(%rdi), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB55_30: ## %else41 +; AVX2-NEXT: vpextrb $15, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_32 +; AVX2-NEXT: ## BB#31: ## %cond.load43 +; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX2-NEXT: vpinsrw $7, 30(%rdi), %xmm4, %xmm4 +; AVX2-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX2-NEXT: LBB55_32: ## %else44 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 +; AVX2-NEXT: vpextrb $0, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_34 +; AVX2-NEXT: ## BB#33: ## %cond.load46 +; AVX2-NEXT: vpinsrw $0, 32(%rdi), %xmm0, %xmm5 +; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm0[4,5,6,7] +; AVX2-NEXT: LBB55_34: ## %else47 +; AVX2-NEXT: vpextrb $1, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_36 +; AVX2-NEXT: ## BB#35: ## %cond.load49 +; AVX2-NEXT: vpinsrw $1, 34(%rdi), %xmm5, %xmm6 +; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX2-NEXT: LBB55_36: ## %else50 +; AVX2-NEXT: vpextrb $2, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_38 +; AVX2-NEXT: ## BB#37: ## %cond.load52 +; AVX2-NEXT: vpinsrw $2, 36(%rdi), %xmm5, %xmm6 +; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX2-NEXT: LBB55_38: ## %else53 +; AVX2-NEXT: vpextrb $3, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_40 +; AVX2-NEXT: ## BB#39: ## %cond.load55 +; AVX2-NEXT: vpinsrw $3, 38(%rdi), %xmm5, %xmm6 +; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX2-NEXT: LBB55_40: ## %else56 +; AVX2-NEXT: vpextrb $4, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_42 +; AVX2-NEXT: ## BB#41: ## %cond.load58 +; AVX2-NEXT: vpinsrw $4, 40(%rdi), %xmm5, %xmm6 +; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX2-NEXT: LBB55_42: ## %else59 +; AVX2-NEXT: vpextrb $5, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_44 +; AVX2-NEXT: ## BB#43: ## %cond.load61 +; AVX2-NEXT: vpinsrw $5, 42(%rdi), %xmm5, %xmm6 +; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX2-NEXT: LBB55_44: ## %else62 +; AVX2-NEXT: vpextrb $6, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_46 +; AVX2-NEXT: ## BB#45: ## %cond.load64 +; AVX2-NEXT: vpinsrw $6, 44(%rdi), %xmm5, %xmm6 +; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX2-NEXT: LBB55_46: ## %else65 +; AVX2-NEXT: vpextrb $7, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_48 +; AVX2-NEXT: ## BB#47: ## %cond.load67 +; AVX2-NEXT: vpinsrw $7, 46(%rdi), %xmm5, %xmm6 +; AVX2-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX2-NEXT: LBB55_48: ## %else68 +; AVX2-NEXT: vpextrb $8, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_50 +; AVX2-NEXT: ## BB#49: ## %cond.load70 +; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX2-NEXT: vpinsrw $0, 48(%rdi), %xmm6, %xmm6 +; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX2-NEXT: LBB55_50: ## %else71 +; AVX2-NEXT: vpextrb $9, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_52 +; AVX2-NEXT: ## BB#51: ## %cond.load73 +; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX2-NEXT: vpinsrw $1, 50(%rdi), %xmm6, %xmm6 +; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX2-NEXT: LBB55_52: ## %else74 +; AVX2-NEXT: vpextrb $10, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_54 +; AVX2-NEXT: ## BB#53: ## %cond.load76 +; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX2-NEXT: vpinsrw $2, 52(%rdi), %xmm6, %xmm6 +; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX2-NEXT: LBB55_54: ## %else77 +; AVX2-NEXT: vpextrb $11, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_56 +; AVX2-NEXT: ## BB#55: ## %cond.load79 +; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX2-NEXT: vpinsrw $3, 54(%rdi), %xmm6, %xmm6 +; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX2-NEXT: LBB55_56: ## %else80 +; AVX2-NEXT: vpextrb $12, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_58 +; AVX2-NEXT: ## BB#57: ## %cond.load82 +; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX2-NEXT: vpinsrw $4, 56(%rdi), %xmm6, %xmm6 +; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX2-NEXT: LBB55_58: ## %else83 +; AVX2-NEXT: vpextrb $13, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_60 +; AVX2-NEXT: ## BB#59: ## %cond.load85 +; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX2-NEXT: vpinsrw $5, 58(%rdi), %xmm6, %xmm6 +; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX2-NEXT: LBB55_60: ## %else86 +; AVX2-NEXT: vpextrb $14, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_62 +; AVX2-NEXT: ## BB#61: ## %cond.load88 +; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX2-NEXT: vpinsrw $6, 60(%rdi), %xmm6, %xmm6 +; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX2-NEXT: LBB55_62: ## %else89 +; AVX2-NEXT: vpextrb $15, %xmm4, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB55_64 +; AVX2-NEXT: ## BB#63: ## %cond.load91 +; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX2-NEXT: vpinsrw $7, 62(%rdi), %xmm6, %xmm6 +; AVX2-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX2-NEXT: LBB55_64: ## %else92 +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0 +; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0 +; AVX2-NEXT: vpblendvb %ymm0, %ymm3, %ymm1, %ymm0 +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero +; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1 +; AVX2-NEXT: vpsraw $15, %ymm1, %ymm1 +; AVX2-NEXT: vpblendvb %ymm1, %ymm5, %ymm2, %ymm1 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: test_mask_load_32xi16: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpextrb $0, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_2 +; AVX512F-NEXT: ## BB#1: ## %cond.load +; AVX512F-NEXT: movzwl (%rdi), %eax +; AVX512F-NEXT: vmovd %eax, %xmm3 +; AVX512F-NEXT: LBB55_2: ## %else +; AVX512F-NEXT: vpextrb $1, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_4 +; AVX512F-NEXT: ## BB#3: ## %cond.load1 +; AVX512F-NEXT: vpinsrw $1, 2(%rdi), %xmm3, %xmm4 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX512F-NEXT: LBB55_4: ## %else2 +; AVX512F-NEXT: vpextrb $2, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_6 +; AVX512F-NEXT: ## BB#5: ## %cond.load4 +; AVX512F-NEXT: vpinsrw $2, 4(%rdi), %xmm3, %xmm4 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX512F-NEXT: LBB55_6: ## %else5 +; AVX512F-NEXT: vpextrb $3, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_8 +; AVX512F-NEXT: ## BB#7: ## %cond.load7 +; AVX512F-NEXT: vpinsrw $3, 6(%rdi), %xmm3, %xmm4 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX512F-NEXT: LBB55_8: ## %else8 +; AVX512F-NEXT: vpextrb $4, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_10 +; AVX512F-NEXT: ## BB#9: ## %cond.load10 +; AVX512F-NEXT: vpinsrw $4, 8(%rdi), %xmm3, %xmm4 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX512F-NEXT: LBB55_10: ## %else11 +; AVX512F-NEXT: vpextrb $5, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_12 +; AVX512F-NEXT: ## BB#11: ## %cond.load13 +; AVX512F-NEXT: vpinsrw $5, 10(%rdi), %xmm3, %xmm4 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX512F-NEXT: LBB55_12: ## %else14 +; AVX512F-NEXT: vpextrb $6, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_14 +; AVX512F-NEXT: ## BB#13: ## %cond.load16 +; AVX512F-NEXT: vpinsrw $6, 12(%rdi), %xmm3, %xmm4 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX512F-NEXT: LBB55_14: ## %else17 +; AVX512F-NEXT: vpextrb $7, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_16 +; AVX512F-NEXT: ## BB#15: ## %cond.load19 +; AVX512F-NEXT: vpinsrw $7, 14(%rdi), %xmm3, %xmm4 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm3 = ymm4[0,1,2,3],ymm3[4,5,6,7] +; AVX512F-NEXT: LBB55_16: ## %else20 +; AVX512F-NEXT: vpextrb $8, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_18 +; AVX512F-NEXT: ## BB#17: ## %cond.load22 +; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512F-NEXT: vpinsrw $0, 16(%rdi), %xmm4, %xmm4 +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX512F-NEXT: LBB55_18: ## %else23 +; AVX512F-NEXT: vpextrb $9, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_20 +; AVX512F-NEXT: ## BB#19: ## %cond.load25 +; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512F-NEXT: vpinsrw $1, 18(%rdi), %xmm4, %xmm4 +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX512F-NEXT: LBB55_20: ## %else26 +; AVX512F-NEXT: vpextrb $10, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_22 +; AVX512F-NEXT: ## BB#21: ## %cond.load28 +; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512F-NEXT: vpinsrw $2, 20(%rdi), %xmm4, %xmm4 +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX512F-NEXT: LBB55_22: ## %else29 +; AVX512F-NEXT: vpextrb $11, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_24 +; AVX512F-NEXT: ## BB#23: ## %cond.load31 +; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512F-NEXT: vpinsrw $3, 22(%rdi), %xmm4, %xmm4 +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX512F-NEXT: LBB55_24: ## %else32 +; AVX512F-NEXT: vpextrb $12, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_26 +; AVX512F-NEXT: ## BB#25: ## %cond.load34 +; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512F-NEXT: vpinsrw $4, 24(%rdi), %xmm4, %xmm4 +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX512F-NEXT: LBB55_26: ## %else35 +; AVX512F-NEXT: vpextrb $13, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_28 +; AVX512F-NEXT: ## BB#27: ## %cond.load37 +; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512F-NEXT: vpinsrw $5, 26(%rdi), %xmm4, %xmm4 +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX512F-NEXT: LBB55_28: ## %else38 +; AVX512F-NEXT: vpextrb $14, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_30 +; AVX512F-NEXT: ## BB#29: ## %cond.load40 +; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512F-NEXT: vpinsrw $6, 28(%rdi), %xmm4, %xmm4 +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX512F-NEXT: LBB55_30: ## %else41 +; AVX512F-NEXT: vpextrb $15, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_32 +; AVX512F-NEXT: ## BB#31: ## %cond.load43 +; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4 +; AVX512F-NEXT: vpinsrw $7, 30(%rdi), %xmm4, %xmm4 +; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 +; AVX512F-NEXT: LBB55_32: ## %else44 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm4 +; AVX512F-NEXT: vpextrb $0, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_34 +; AVX512F-NEXT: ## BB#33: ## %cond.load46 +; AVX512F-NEXT: vpinsrw $0, 32(%rdi), %xmm0, %xmm5 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm5[0,1,2,3],ymm0[4,5,6,7] +; AVX512F-NEXT: LBB55_34: ## %else47 +; AVX512F-NEXT: vpextrb $1, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_36 +; AVX512F-NEXT: ## BB#35: ## %cond.load49 +; AVX512F-NEXT: vpinsrw $1, 34(%rdi), %xmm5, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX512F-NEXT: LBB55_36: ## %else50 +; AVX512F-NEXT: vpextrb $2, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_38 +; AVX512F-NEXT: ## BB#37: ## %cond.load52 +; AVX512F-NEXT: vpinsrw $2, 36(%rdi), %xmm5, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX512F-NEXT: LBB55_38: ## %else53 +; AVX512F-NEXT: vpextrb $3, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_40 +; AVX512F-NEXT: ## BB#39: ## %cond.load55 +; AVX512F-NEXT: vpinsrw $3, 38(%rdi), %xmm5, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX512F-NEXT: LBB55_40: ## %else56 +; AVX512F-NEXT: vpextrb $4, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_42 +; AVX512F-NEXT: ## BB#41: ## %cond.load58 +; AVX512F-NEXT: vpinsrw $4, 40(%rdi), %xmm5, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX512F-NEXT: LBB55_42: ## %else59 +; AVX512F-NEXT: vpextrb $5, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_44 +; AVX512F-NEXT: ## BB#43: ## %cond.load61 +; AVX512F-NEXT: vpinsrw $5, 42(%rdi), %xmm5, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX512F-NEXT: LBB55_44: ## %else62 +; AVX512F-NEXT: vpextrb $6, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_46 +; AVX512F-NEXT: ## BB#45: ## %cond.load64 +; AVX512F-NEXT: vpinsrw $6, 44(%rdi), %xmm5, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX512F-NEXT: LBB55_46: ## %else65 +; AVX512F-NEXT: vpextrb $7, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_48 +; AVX512F-NEXT: ## BB#47: ## %cond.load67 +; AVX512F-NEXT: vpinsrw $7, 46(%rdi), %xmm5, %xmm6 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7] +; AVX512F-NEXT: LBB55_48: ## %else68 +; AVX512F-NEXT: vpextrb $8, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_50 +; AVX512F-NEXT: ## BB#49: ## %cond.load70 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX512F-NEXT: vpinsrw $0, 48(%rdi), %xmm6, %xmm6 +; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX512F-NEXT: LBB55_50: ## %else71 +; AVX512F-NEXT: vpextrb $9, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_52 +; AVX512F-NEXT: ## BB#51: ## %cond.load73 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX512F-NEXT: vpinsrw $1, 50(%rdi), %xmm6, %xmm6 +; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX512F-NEXT: LBB55_52: ## %else74 +; AVX512F-NEXT: vpextrb $10, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_54 +; AVX512F-NEXT: ## BB#53: ## %cond.load76 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX512F-NEXT: vpinsrw $2, 52(%rdi), %xmm6, %xmm6 +; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX512F-NEXT: LBB55_54: ## %else77 +; AVX512F-NEXT: vpextrb $11, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_56 +; AVX512F-NEXT: ## BB#55: ## %cond.load79 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX512F-NEXT: vpinsrw $3, 54(%rdi), %xmm6, %xmm6 +; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX512F-NEXT: LBB55_56: ## %else80 +; AVX512F-NEXT: vpextrb $12, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_58 +; AVX512F-NEXT: ## BB#57: ## %cond.load82 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX512F-NEXT: vpinsrw $4, 56(%rdi), %xmm6, %xmm6 +; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX512F-NEXT: LBB55_58: ## %else83 +; AVX512F-NEXT: vpextrb $13, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_60 +; AVX512F-NEXT: ## BB#59: ## %cond.load85 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX512F-NEXT: vpinsrw $5, 58(%rdi), %xmm6, %xmm6 +; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX512F-NEXT: LBB55_60: ## %else86 +; AVX512F-NEXT: vpextrb $14, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_62 +; AVX512F-NEXT: ## BB#61: ## %cond.load88 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX512F-NEXT: vpinsrw $6, 60(%rdi), %xmm6, %xmm6 +; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX512F-NEXT: LBB55_62: ## %else89 +; AVX512F-NEXT: vpextrb $15, %xmm4, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB55_64 +; AVX512F-NEXT: ## BB#63: ## %cond.load91 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm6 +; AVX512F-NEXT: vpinsrw $7, 62(%rdi), %xmm6, %xmm6 +; AVX512F-NEXT: vinserti128 $1, %xmm6, %ymm5, %ymm5 +; AVX512F-NEXT: LBB55_64: ## %else92 +; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512F-NEXT: vpsllw $15, %ymm0, %ymm0 +; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendvb %ymm0, %ymm3, %ymm1, %ymm0 +; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero +; AVX512F-NEXT: vpsllw $15, %ymm1, %ymm1 +; AVX512F-NEXT: vpsraw $15, %ymm1, %ymm1 +; AVX512F-NEXT: vpblendvb %ymm1, %ymm5, %ymm2, %ymm1 +; AVX512F-NEXT: retq +; ; SKX-LABEL: test_mask_load_32xi16: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 @@ -2295,6 +6917,241 @@ define <32 x i16> @test_mask_load_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 declare <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>*, i32, <32 x i1>, <32 x i16>) define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> %val) { +; AVX-LABEL: test_mask_store_16xi8: +; AVX: ## BB#0: +; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_2 +; AVX-NEXT: ## BB#1: ## %cond.store +; AVX-NEXT: vpextrb $0, %xmm1, (%rdi) +; AVX-NEXT: LBB56_2: ## %else +; AVX-NEXT: vpextrb $1, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_4 +; AVX-NEXT: ## BB#3: ## %cond.store1 +; AVX-NEXT: vpextrb $1, %xmm1, 1(%rdi) +; AVX-NEXT: LBB56_4: ## %else2 +; AVX-NEXT: vpextrb $2, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_6 +; AVX-NEXT: ## BB#5: ## %cond.store3 +; AVX-NEXT: vpextrb $2, %xmm1, 2(%rdi) +; AVX-NEXT: LBB56_6: ## %else4 +; AVX-NEXT: vpextrb $3, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_8 +; AVX-NEXT: ## BB#7: ## %cond.store5 +; AVX-NEXT: vpextrb $3, %xmm1, 3(%rdi) +; AVX-NEXT: LBB56_8: ## %else6 +; AVX-NEXT: vpextrb $4, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_10 +; AVX-NEXT: ## BB#9: ## %cond.store7 +; AVX-NEXT: vpextrb $4, %xmm1, 4(%rdi) +; AVX-NEXT: LBB56_10: ## %else8 +; AVX-NEXT: vpextrb $5, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_12 +; AVX-NEXT: ## BB#11: ## %cond.store9 +; AVX-NEXT: vpextrb $5, %xmm1, 5(%rdi) +; AVX-NEXT: LBB56_12: ## %else10 +; AVX-NEXT: vpextrb $6, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_14 +; AVX-NEXT: ## BB#13: ## %cond.store11 +; AVX-NEXT: vpextrb $6, %xmm1, 6(%rdi) +; AVX-NEXT: LBB56_14: ## %else12 +; AVX-NEXT: vpextrb $7, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_16 +; AVX-NEXT: ## BB#15: ## %cond.store13 +; AVX-NEXT: vpextrb $7, %xmm1, 7(%rdi) +; AVX-NEXT: LBB56_16: ## %else14 +; AVX-NEXT: vpextrb $8, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_18 +; AVX-NEXT: ## BB#17: ## %cond.store15 +; AVX-NEXT: vpextrb $8, %xmm1, 8(%rdi) +; AVX-NEXT: LBB56_18: ## %else16 +; AVX-NEXT: vpextrb $9, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_20 +; AVX-NEXT: ## BB#19: ## %cond.store17 +; AVX-NEXT: vpextrb $9, %xmm1, 9(%rdi) +; AVX-NEXT: LBB56_20: ## %else18 +; AVX-NEXT: vpextrb $10, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_22 +; AVX-NEXT: ## BB#21: ## %cond.store19 +; AVX-NEXT: vpextrb $10, %xmm1, 10(%rdi) +; AVX-NEXT: LBB56_22: ## %else20 +; AVX-NEXT: vpextrb $11, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_24 +; AVX-NEXT: ## BB#23: ## %cond.store21 +; AVX-NEXT: vpextrb $11, %xmm1, 11(%rdi) +; AVX-NEXT: LBB56_24: ## %else22 +; AVX-NEXT: vpextrb $12, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_26 +; AVX-NEXT: ## BB#25: ## %cond.store23 +; AVX-NEXT: vpextrb $12, %xmm1, 12(%rdi) +; AVX-NEXT: LBB56_26: ## %else24 +; AVX-NEXT: vpextrb $13, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_28 +; AVX-NEXT: ## BB#27: ## %cond.store25 +; AVX-NEXT: vpextrb $13, %xmm1, 13(%rdi) +; AVX-NEXT: LBB56_28: ## %else26 +; AVX-NEXT: vpextrb $14, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_30 +; AVX-NEXT: ## BB#29: ## %cond.store27 +; AVX-NEXT: vpextrb $14, %xmm1, 14(%rdi) +; AVX-NEXT: LBB56_30: ## %else28 +; AVX-NEXT: vpextrb $15, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB56_32 +; AVX-NEXT: ## BB#31: ## %cond.store29 +; AVX-NEXT: vpextrb $15, %xmm1, 15(%rdi) +; AVX-NEXT: LBB56_32: ## %else30 +; AVX-NEXT: retq +; +; AVX512F-LABEL: test_mask_store_16xi8: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_2 +; AVX512F-NEXT: ## BB#1: ## %cond.store +; AVX512F-NEXT: vpextrb $0, %xmm1, (%rdi) +; AVX512F-NEXT: LBB56_2: ## %else +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_4 +; AVX512F-NEXT: ## BB#3: ## %cond.store1 +; AVX512F-NEXT: vpextrb $1, %xmm1, 1(%rdi) +; AVX512F-NEXT: LBB56_4: ## %else2 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_6 +; AVX512F-NEXT: ## BB#5: ## %cond.store3 +; AVX512F-NEXT: vpextrb $2, %xmm1, 2(%rdi) +; AVX512F-NEXT: LBB56_6: ## %else4 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_8 +; AVX512F-NEXT: ## BB#7: ## %cond.store5 +; AVX512F-NEXT: vpextrb $3, %xmm1, 3(%rdi) +; AVX512F-NEXT: LBB56_8: ## %else6 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_10 +; AVX512F-NEXT: ## BB#9: ## %cond.store7 +; AVX512F-NEXT: vpextrb $4, %xmm1, 4(%rdi) +; AVX512F-NEXT: LBB56_10: ## %else8 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_12 +; AVX512F-NEXT: ## BB#11: ## %cond.store9 +; AVX512F-NEXT: vpextrb $5, %xmm1, 5(%rdi) +; AVX512F-NEXT: LBB56_12: ## %else10 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_14 +; AVX512F-NEXT: ## BB#13: ## %cond.store11 +; AVX512F-NEXT: vpextrb $6, %xmm1, 6(%rdi) +; AVX512F-NEXT: LBB56_14: ## %else12 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_16 +; AVX512F-NEXT: ## BB#15: ## %cond.store13 +; AVX512F-NEXT: vpextrb $7, %xmm1, 7(%rdi) +; AVX512F-NEXT: LBB56_16: ## %else14 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_18 +; AVX512F-NEXT: ## BB#17: ## %cond.store15 +; AVX512F-NEXT: vpextrb $8, %xmm1, 8(%rdi) +; AVX512F-NEXT: LBB56_18: ## %else16 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_20 +; AVX512F-NEXT: ## BB#19: ## %cond.store17 +; AVX512F-NEXT: vpextrb $9, %xmm1, 9(%rdi) +; AVX512F-NEXT: LBB56_20: ## %else18 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_22 +; AVX512F-NEXT: ## BB#21: ## %cond.store19 +; AVX512F-NEXT: vpextrb $10, %xmm1, 10(%rdi) +; AVX512F-NEXT: LBB56_22: ## %else20 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_24 +; AVX512F-NEXT: ## BB#23: ## %cond.store21 +; AVX512F-NEXT: vpextrb $11, %xmm1, 11(%rdi) +; AVX512F-NEXT: LBB56_24: ## %else22 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_26 +; AVX512F-NEXT: ## BB#25: ## %cond.store23 +; AVX512F-NEXT: vpextrb $12, %xmm1, 12(%rdi) +; AVX512F-NEXT: LBB56_26: ## %else24 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_28 +; AVX512F-NEXT: ## BB#27: ## %cond.store25 +; AVX512F-NEXT: vpextrb $13, %xmm1, 13(%rdi) +; AVX512F-NEXT: LBB56_28: ## %else26 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_30 +; AVX512F-NEXT: ## BB#29: ## %cond.store27 +; AVX512F-NEXT: vpextrb $14, %xmm1, 14(%rdi) +; AVX512F-NEXT: LBB56_30: ## %else28 +; AVX512F-NEXT: kshiftlw $0, %k0, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB56_32 +; AVX512F-NEXT: ## BB#31: ## %cond.store29 +; AVX512F-NEXT: vpextrb $15, %xmm1, 15(%rdi) +; AVX512F-NEXT: LBB56_32: ## %else30 +; AVX512F-NEXT: retq +; ; SKX-LABEL: test_mask_store_16xi8: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -2307,6 +7164,647 @@ define void @test_mask_store_16xi8(<16 x i1> %mask, <16 x i8>* %addr, <16 x i8> declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32, <16 x i1>) define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> %val) { +; AVX1-LABEL: test_mask_store_32xi8: +; AVX1: ## BB#0: +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_2 +; AVX1-NEXT: ## BB#1: ## %cond.store +; AVX1-NEXT: vpextrb $0, %xmm1, (%rdi) +; AVX1-NEXT: LBB57_2: ## %else +; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_4 +; AVX1-NEXT: ## BB#3: ## %cond.store1 +; AVX1-NEXT: vpextrb $1, %xmm1, 1(%rdi) +; AVX1-NEXT: LBB57_4: ## %else2 +; AVX1-NEXT: vpextrb $2, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_6 +; AVX1-NEXT: ## BB#5: ## %cond.store3 +; AVX1-NEXT: vpextrb $2, %xmm1, 2(%rdi) +; AVX1-NEXT: LBB57_6: ## %else4 +; AVX1-NEXT: vpextrb $3, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_8 +; AVX1-NEXT: ## BB#7: ## %cond.store5 +; AVX1-NEXT: vpextrb $3, %xmm1, 3(%rdi) +; AVX1-NEXT: LBB57_8: ## %else6 +; AVX1-NEXT: vpextrb $4, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_10 +; AVX1-NEXT: ## BB#9: ## %cond.store7 +; AVX1-NEXT: vpextrb $4, %xmm1, 4(%rdi) +; AVX1-NEXT: LBB57_10: ## %else8 +; AVX1-NEXT: vpextrb $5, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_12 +; AVX1-NEXT: ## BB#11: ## %cond.store9 +; AVX1-NEXT: vpextrb $5, %xmm1, 5(%rdi) +; AVX1-NEXT: LBB57_12: ## %else10 +; AVX1-NEXT: vpextrb $6, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_14 +; AVX1-NEXT: ## BB#13: ## %cond.store11 +; AVX1-NEXT: vpextrb $6, %xmm1, 6(%rdi) +; AVX1-NEXT: LBB57_14: ## %else12 +; AVX1-NEXT: vpextrb $7, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_16 +; AVX1-NEXT: ## BB#15: ## %cond.store13 +; AVX1-NEXT: vpextrb $7, %xmm1, 7(%rdi) +; AVX1-NEXT: LBB57_16: ## %else14 +; AVX1-NEXT: vpextrb $8, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_18 +; AVX1-NEXT: ## BB#17: ## %cond.store15 +; AVX1-NEXT: vpextrb $8, %xmm1, 8(%rdi) +; AVX1-NEXT: LBB57_18: ## %else16 +; AVX1-NEXT: vpextrb $9, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_20 +; AVX1-NEXT: ## BB#19: ## %cond.store17 +; AVX1-NEXT: vpextrb $9, %xmm1, 9(%rdi) +; AVX1-NEXT: LBB57_20: ## %else18 +; AVX1-NEXT: vpextrb $10, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_22 +; AVX1-NEXT: ## BB#21: ## %cond.store19 +; AVX1-NEXT: vpextrb $10, %xmm1, 10(%rdi) +; AVX1-NEXT: LBB57_22: ## %else20 +; AVX1-NEXT: vpextrb $11, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_24 +; AVX1-NEXT: ## BB#23: ## %cond.store21 +; AVX1-NEXT: vpextrb $11, %xmm1, 11(%rdi) +; AVX1-NEXT: LBB57_24: ## %else22 +; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_26 +; AVX1-NEXT: ## BB#25: ## %cond.store23 +; AVX1-NEXT: vpextrb $12, %xmm1, 12(%rdi) +; AVX1-NEXT: LBB57_26: ## %else24 +; AVX1-NEXT: vpextrb $13, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_28 +; AVX1-NEXT: ## BB#27: ## %cond.store25 +; AVX1-NEXT: vpextrb $13, %xmm1, 13(%rdi) +; AVX1-NEXT: LBB57_28: ## %else26 +; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_30 +; AVX1-NEXT: ## BB#29: ## %cond.store27 +; AVX1-NEXT: vpextrb $14, %xmm1, 14(%rdi) +; AVX1-NEXT: LBB57_30: ## %else28 +; AVX1-NEXT: vpextrb $15, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_32 +; AVX1-NEXT: ## BB#31: ## %cond.store29 +; AVX1-NEXT: vpextrb $15, %xmm1, 15(%rdi) +; AVX1-NEXT: LBB57_32: ## %else30 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_34 +; AVX1-NEXT: ## BB#33: ## %cond.store31 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $0, %xmm2, 16(%rdi) +; AVX1-NEXT: LBB57_34: ## %else32 +; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_36 +; AVX1-NEXT: ## BB#35: ## %cond.store33 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $1, %xmm2, 17(%rdi) +; AVX1-NEXT: LBB57_36: ## %else34 +; AVX1-NEXT: vpextrb $2, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_38 +; AVX1-NEXT: ## BB#37: ## %cond.store35 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $2, %xmm2, 18(%rdi) +; AVX1-NEXT: LBB57_38: ## %else36 +; AVX1-NEXT: vpextrb $3, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_40 +; AVX1-NEXT: ## BB#39: ## %cond.store37 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $3, %xmm2, 19(%rdi) +; AVX1-NEXT: LBB57_40: ## %else38 +; AVX1-NEXT: vpextrb $4, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_42 +; AVX1-NEXT: ## BB#41: ## %cond.store39 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $4, %xmm2, 20(%rdi) +; AVX1-NEXT: LBB57_42: ## %else40 +; AVX1-NEXT: vpextrb $5, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_44 +; AVX1-NEXT: ## BB#43: ## %cond.store41 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $5, %xmm2, 21(%rdi) +; AVX1-NEXT: LBB57_44: ## %else42 +; AVX1-NEXT: vpextrb $6, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_46 +; AVX1-NEXT: ## BB#45: ## %cond.store43 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $6, %xmm2, 22(%rdi) +; AVX1-NEXT: LBB57_46: ## %else44 +; AVX1-NEXT: vpextrb $7, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_48 +; AVX1-NEXT: ## BB#47: ## %cond.store45 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $7, %xmm2, 23(%rdi) +; AVX1-NEXT: LBB57_48: ## %else46 +; AVX1-NEXT: vpextrb $8, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_50 +; AVX1-NEXT: ## BB#49: ## %cond.store47 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $8, %xmm2, 24(%rdi) +; AVX1-NEXT: LBB57_50: ## %else48 +; AVX1-NEXT: vpextrb $9, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_52 +; AVX1-NEXT: ## BB#51: ## %cond.store49 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $9, %xmm2, 25(%rdi) +; AVX1-NEXT: LBB57_52: ## %else50 +; AVX1-NEXT: vpextrb $10, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_54 +; AVX1-NEXT: ## BB#53: ## %cond.store51 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $10, %xmm2, 26(%rdi) +; AVX1-NEXT: LBB57_54: ## %else52 +; AVX1-NEXT: vpextrb $11, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_56 +; AVX1-NEXT: ## BB#55: ## %cond.store53 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $11, %xmm2, 27(%rdi) +; AVX1-NEXT: LBB57_56: ## %else54 +; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_58 +; AVX1-NEXT: ## BB#57: ## %cond.store55 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $12, %xmm2, 28(%rdi) +; AVX1-NEXT: LBB57_58: ## %else56 +; AVX1-NEXT: vpextrb $13, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_60 +; AVX1-NEXT: ## BB#59: ## %cond.store57 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $13, %xmm2, 29(%rdi) +; AVX1-NEXT: LBB57_60: ## %else58 +; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_62 +; AVX1-NEXT: ## BB#61: ## %cond.store59 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrb $14, %xmm2, 30(%rdi) +; AVX1-NEXT: LBB57_62: ## %else60 +; AVX1-NEXT: vpextrb $15, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB57_64 +; AVX1-NEXT: ## BB#63: ## %cond.store61 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $15, %xmm0, 31(%rdi) +; AVX1-NEXT: LBB57_64: ## %else62 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_mask_store_32xi8: +; AVX2: ## BB#0: +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_2 +; AVX2-NEXT: ## BB#1: ## %cond.store +; AVX2-NEXT: vpextrb $0, %xmm1, (%rdi) +; AVX2-NEXT: LBB57_2: ## %else +; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_4 +; AVX2-NEXT: ## BB#3: ## %cond.store1 +; AVX2-NEXT: vpextrb $1, %xmm1, 1(%rdi) +; AVX2-NEXT: LBB57_4: ## %else2 +; AVX2-NEXT: vpextrb $2, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_6 +; AVX2-NEXT: ## BB#5: ## %cond.store3 +; AVX2-NEXT: vpextrb $2, %xmm1, 2(%rdi) +; AVX2-NEXT: LBB57_6: ## %else4 +; AVX2-NEXT: vpextrb $3, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_8 +; AVX2-NEXT: ## BB#7: ## %cond.store5 +; AVX2-NEXT: vpextrb $3, %xmm1, 3(%rdi) +; AVX2-NEXT: LBB57_8: ## %else6 +; AVX2-NEXT: vpextrb $4, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_10 +; AVX2-NEXT: ## BB#9: ## %cond.store7 +; AVX2-NEXT: vpextrb $4, %xmm1, 4(%rdi) +; AVX2-NEXT: LBB57_10: ## %else8 +; AVX2-NEXT: vpextrb $5, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_12 +; AVX2-NEXT: ## BB#11: ## %cond.store9 +; AVX2-NEXT: vpextrb $5, %xmm1, 5(%rdi) +; AVX2-NEXT: LBB57_12: ## %else10 +; AVX2-NEXT: vpextrb $6, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_14 +; AVX2-NEXT: ## BB#13: ## %cond.store11 +; AVX2-NEXT: vpextrb $6, %xmm1, 6(%rdi) +; AVX2-NEXT: LBB57_14: ## %else12 +; AVX2-NEXT: vpextrb $7, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_16 +; AVX2-NEXT: ## BB#15: ## %cond.store13 +; AVX2-NEXT: vpextrb $7, %xmm1, 7(%rdi) +; AVX2-NEXT: LBB57_16: ## %else14 +; AVX2-NEXT: vpextrb $8, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_18 +; AVX2-NEXT: ## BB#17: ## %cond.store15 +; AVX2-NEXT: vpextrb $8, %xmm1, 8(%rdi) +; AVX2-NEXT: LBB57_18: ## %else16 +; AVX2-NEXT: vpextrb $9, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_20 +; AVX2-NEXT: ## BB#19: ## %cond.store17 +; AVX2-NEXT: vpextrb $9, %xmm1, 9(%rdi) +; AVX2-NEXT: LBB57_20: ## %else18 +; AVX2-NEXT: vpextrb $10, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_22 +; AVX2-NEXT: ## BB#21: ## %cond.store19 +; AVX2-NEXT: vpextrb $10, %xmm1, 10(%rdi) +; AVX2-NEXT: LBB57_22: ## %else20 +; AVX2-NEXT: vpextrb $11, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_24 +; AVX2-NEXT: ## BB#23: ## %cond.store21 +; AVX2-NEXT: vpextrb $11, %xmm1, 11(%rdi) +; AVX2-NEXT: LBB57_24: ## %else22 +; AVX2-NEXT: vpextrb $12, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_26 +; AVX2-NEXT: ## BB#25: ## %cond.store23 +; AVX2-NEXT: vpextrb $12, %xmm1, 12(%rdi) +; AVX2-NEXT: LBB57_26: ## %else24 +; AVX2-NEXT: vpextrb $13, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_28 +; AVX2-NEXT: ## BB#27: ## %cond.store25 +; AVX2-NEXT: vpextrb $13, %xmm1, 13(%rdi) +; AVX2-NEXT: LBB57_28: ## %else26 +; AVX2-NEXT: vpextrb $14, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_30 +; AVX2-NEXT: ## BB#29: ## %cond.store27 +; AVX2-NEXT: vpextrb $14, %xmm1, 14(%rdi) +; AVX2-NEXT: LBB57_30: ## %else28 +; AVX2-NEXT: vpextrb $15, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_32 +; AVX2-NEXT: ## BB#31: ## %cond.store29 +; AVX2-NEXT: vpextrb $15, %xmm1, 15(%rdi) +; AVX2-NEXT: LBB57_32: ## %else30 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_34 +; AVX2-NEXT: ## BB#33: ## %cond.store31 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $0, %xmm2, 16(%rdi) +; AVX2-NEXT: LBB57_34: ## %else32 +; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_36 +; AVX2-NEXT: ## BB#35: ## %cond.store33 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $1, %xmm2, 17(%rdi) +; AVX2-NEXT: LBB57_36: ## %else34 +; AVX2-NEXT: vpextrb $2, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_38 +; AVX2-NEXT: ## BB#37: ## %cond.store35 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $2, %xmm2, 18(%rdi) +; AVX2-NEXT: LBB57_38: ## %else36 +; AVX2-NEXT: vpextrb $3, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_40 +; AVX2-NEXT: ## BB#39: ## %cond.store37 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $3, %xmm2, 19(%rdi) +; AVX2-NEXT: LBB57_40: ## %else38 +; AVX2-NEXT: vpextrb $4, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_42 +; AVX2-NEXT: ## BB#41: ## %cond.store39 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $4, %xmm2, 20(%rdi) +; AVX2-NEXT: LBB57_42: ## %else40 +; AVX2-NEXT: vpextrb $5, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_44 +; AVX2-NEXT: ## BB#43: ## %cond.store41 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $5, %xmm2, 21(%rdi) +; AVX2-NEXT: LBB57_44: ## %else42 +; AVX2-NEXT: vpextrb $6, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_46 +; AVX2-NEXT: ## BB#45: ## %cond.store43 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $6, %xmm2, 22(%rdi) +; AVX2-NEXT: LBB57_46: ## %else44 +; AVX2-NEXT: vpextrb $7, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_48 +; AVX2-NEXT: ## BB#47: ## %cond.store45 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $7, %xmm2, 23(%rdi) +; AVX2-NEXT: LBB57_48: ## %else46 +; AVX2-NEXT: vpextrb $8, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_50 +; AVX2-NEXT: ## BB#49: ## %cond.store47 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $8, %xmm2, 24(%rdi) +; AVX2-NEXT: LBB57_50: ## %else48 +; AVX2-NEXT: vpextrb $9, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_52 +; AVX2-NEXT: ## BB#51: ## %cond.store49 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $9, %xmm2, 25(%rdi) +; AVX2-NEXT: LBB57_52: ## %else50 +; AVX2-NEXT: vpextrb $10, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_54 +; AVX2-NEXT: ## BB#53: ## %cond.store51 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $10, %xmm2, 26(%rdi) +; AVX2-NEXT: LBB57_54: ## %else52 +; AVX2-NEXT: vpextrb $11, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_56 +; AVX2-NEXT: ## BB#55: ## %cond.store53 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $11, %xmm2, 27(%rdi) +; AVX2-NEXT: LBB57_56: ## %else54 +; AVX2-NEXT: vpextrb $12, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_58 +; AVX2-NEXT: ## BB#57: ## %cond.store55 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $12, %xmm2, 28(%rdi) +; AVX2-NEXT: LBB57_58: ## %else56 +; AVX2-NEXT: vpextrb $13, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_60 +; AVX2-NEXT: ## BB#59: ## %cond.store57 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $13, %xmm2, 29(%rdi) +; AVX2-NEXT: LBB57_60: ## %else58 +; AVX2-NEXT: vpextrb $14, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_62 +; AVX2-NEXT: ## BB#61: ## %cond.store59 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrb $14, %xmm2, 30(%rdi) +; AVX2-NEXT: LBB57_62: ## %else60 +; AVX2-NEXT: vpextrb $15, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB57_64 +; AVX2-NEXT: ## BB#63: ## %cond.store61 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $15, %xmm0, 31(%rdi) +; AVX2-NEXT: LBB57_64: ## %else62 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: test_mask_store_32xi8: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpextrb $0, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_2 +; AVX512F-NEXT: ## BB#1: ## %cond.store +; AVX512F-NEXT: vpextrb $0, %xmm1, (%rdi) +; AVX512F-NEXT: LBB57_2: ## %else +; AVX512F-NEXT: vpextrb $1, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_4 +; AVX512F-NEXT: ## BB#3: ## %cond.store1 +; AVX512F-NEXT: vpextrb $1, %xmm1, 1(%rdi) +; AVX512F-NEXT: LBB57_4: ## %else2 +; AVX512F-NEXT: vpextrb $2, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_6 +; AVX512F-NEXT: ## BB#5: ## %cond.store3 +; AVX512F-NEXT: vpextrb $2, %xmm1, 2(%rdi) +; AVX512F-NEXT: LBB57_6: ## %else4 +; AVX512F-NEXT: vpextrb $3, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_8 +; AVX512F-NEXT: ## BB#7: ## %cond.store5 +; AVX512F-NEXT: vpextrb $3, %xmm1, 3(%rdi) +; AVX512F-NEXT: LBB57_8: ## %else6 +; AVX512F-NEXT: vpextrb $4, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_10 +; AVX512F-NEXT: ## BB#9: ## %cond.store7 +; AVX512F-NEXT: vpextrb $4, %xmm1, 4(%rdi) +; AVX512F-NEXT: LBB57_10: ## %else8 +; AVX512F-NEXT: vpextrb $5, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_12 +; AVX512F-NEXT: ## BB#11: ## %cond.store9 +; AVX512F-NEXT: vpextrb $5, %xmm1, 5(%rdi) +; AVX512F-NEXT: LBB57_12: ## %else10 +; AVX512F-NEXT: vpextrb $6, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_14 +; AVX512F-NEXT: ## BB#13: ## %cond.store11 +; AVX512F-NEXT: vpextrb $6, %xmm1, 6(%rdi) +; AVX512F-NEXT: LBB57_14: ## %else12 +; AVX512F-NEXT: vpextrb $7, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_16 +; AVX512F-NEXT: ## BB#15: ## %cond.store13 +; AVX512F-NEXT: vpextrb $7, %xmm1, 7(%rdi) +; AVX512F-NEXT: LBB57_16: ## %else14 +; AVX512F-NEXT: vpextrb $8, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_18 +; AVX512F-NEXT: ## BB#17: ## %cond.store15 +; AVX512F-NEXT: vpextrb $8, %xmm1, 8(%rdi) +; AVX512F-NEXT: LBB57_18: ## %else16 +; AVX512F-NEXT: vpextrb $9, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_20 +; AVX512F-NEXT: ## BB#19: ## %cond.store17 +; AVX512F-NEXT: vpextrb $9, %xmm1, 9(%rdi) +; AVX512F-NEXT: LBB57_20: ## %else18 +; AVX512F-NEXT: vpextrb $10, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_22 +; AVX512F-NEXT: ## BB#21: ## %cond.store19 +; AVX512F-NEXT: vpextrb $10, %xmm1, 10(%rdi) +; AVX512F-NEXT: LBB57_22: ## %else20 +; AVX512F-NEXT: vpextrb $11, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_24 +; AVX512F-NEXT: ## BB#23: ## %cond.store21 +; AVX512F-NEXT: vpextrb $11, %xmm1, 11(%rdi) +; AVX512F-NEXT: LBB57_24: ## %else22 +; AVX512F-NEXT: vpextrb $12, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_26 +; AVX512F-NEXT: ## BB#25: ## %cond.store23 +; AVX512F-NEXT: vpextrb $12, %xmm1, 12(%rdi) +; AVX512F-NEXT: LBB57_26: ## %else24 +; AVX512F-NEXT: vpextrb $13, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_28 +; AVX512F-NEXT: ## BB#27: ## %cond.store25 +; AVX512F-NEXT: vpextrb $13, %xmm1, 13(%rdi) +; AVX512F-NEXT: LBB57_28: ## %else26 +; AVX512F-NEXT: vpextrb $14, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_30 +; AVX512F-NEXT: ## BB#29: ## %cond.store27 +; AVX512F-NEXT: vpextrb $14, %xmm1, 14(%rdi) +; AVX512F-NEXT: LBB57_30: ## %else28 +; AVX512F-NEXT: vpextrb $15, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_32 +; AVX512F-NEXT: ## BB#31: ## %cond.store29 +; AVX512F-NEXT: vpextrb $15, %xmm1, 15(%rdi) +; AVX512F-NEXT: LBB57_32: ## %else30 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpextrb $0, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_34 +; AVX512F-NEXT: ## BB#33: ## %cond.store31 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $0, %xmm2, 16(%rdi) +; AVX512F-NEXT: LBB57_34: ## %else32 +; AVX512F-NEXT: vpextrb $1, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_36 +; AVX512F-NEXT: ## BB#35: ## %cond.store33 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $1, %xmm2, 17(%rdi) +; AVX512F-NEXT: LBB57_36: ## %else34 +; AVX512F-NEXT: vpextrb $2, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_38 +; AVX512F-NEXT: ## BB#37: ## %cond.store35 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $2, %xmm2, 18(%rdi) +; AVX512F-NEXT: LBB57_38: ## %else36 +; AVX512F-NEXT: vpextrb $3, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_40 +; AVX512F-NEXT: ## BB#39: ## %cond.store37 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $3, %xmm2, 19(%rdi) +; AVX512F-NEXT: LBB57_40: ## %else38 +; AVX512F-NEXT: vpextrb $4, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_42 +; AVX512F-NEXT: ## BB#41: ## %cond.store39 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $4, %xmm2, 20(%rdi) +; AVX512F-NEXT: LBB57_42: ## %else40 +; AVX512F-NEXT: vpextrb $5, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_44 +; AVX512F-NEXT: ## BB#43: ## %cond.store41 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $5, %xmm2, 21(%rdi) +; AVX512F-NEXT: LBB57_44: ## %else42 +; AVX512F-NEXT: vpextrb $6, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_46 +; AVX512F-NEXT: ## BB#45: ## %cond.store43 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $6, %xmm2, 22(%rdi) +; AVX512F-NEXT: LBB57_46: ## %else44 +; AVX512F-NEXT: vpextrb $7, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_48 +; AVX512F-NEXT: ## BB#47: ## %cond.store45 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $7, %xmm2, 23(%rdi) +; AVX512F-NEXT: LBB57_48: ## %else46 +; AVX512F-NEXT: vpextrb $8, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_50 +; AVX512F-NEXT: ## BB#49: ## %cond.store47 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $8, %xmm2, 24(%rdi) +; AVX512F-NEXT: LBB57_50: ## %else48 +; AVX512F-NEXT: vpextrb $9, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_52 +; AVX512F-NEXT: ## BB#51: ## %cond.store49 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $9, %xmm2, 25(%rdi) +; AVX512F-NEXT: LBB57_52: ## %else50 +; AVX512F-NEXT: vpextrb $10, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_54 +; AVX512F-NEXT: ## BB#53: ## %cond.store51 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $10, %xmm2, 26(%rdi) +; AVX512F-NEXT: LBB57_54: ## %else52 +; AVX512F-NEXT: vpextrb $11, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_56 +; AVX512F-NEXT: ## BB#55: ## %cond.store53 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $11, %xmm2, 27(%rdi) +; AVX512F-NEXT: LBB57_56: ## %else54 +; AVX512F-NEXT: vpextrb $12, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_58 +; AVX512F-NEXT: ## BB#57: ## %cond.store55 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $12, %xmm2, 28(%rdi) +; AVX512F-NEXT: LBB57_58: ## %else56 +; AVX512F-NEXT: vpextrb $13, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_60 +; AVX512F-NEXT: ## BB#59: ## %cond.store57 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $13, %xmm2, 29(%rdi) +; AVX512F-NEXT: LBB57_60: ## %else58 +; AVX512F-NEXT: vpextrb $14, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_62 +; AVX512F-NEXT: ## BB#61: ## %cond.store59 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX512F-NEXT: vpextrb $14, %xmm2, 30(%rdi) +; AVX512F-NEXT: LBB57_62: ## %else60 +; AVX512F-NEXT: vpextrb $15, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB57_64 +; AVX512F-NEXT: ## BB#63: ## %cond.store61 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX512F-NEXT: vpextrb $15, %xmm0, 31(%rdi) +; AVX512F-NEXT: LBB57_64: ## %else62 +; AVX512F-NEXT: retq +; ; SKX-LABEL: test_mask_store_32xi8: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 @@ -2319,6 +7817,1398 @@ define void @test_mask_store_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x i8> declare void @llvm.masked.store.v32i8.p0v32i8(<32 x i8>, <32 x i8>*, i32, <32 x i1>) define void @test_mask_store_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> %val) { +; AVX1-LABEL: test_mask_store_64xi8: +; AVX1: ## BB#0: +; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX1-NEXT: testb $1, %dil +; AVX1-NEXT: je LBB58_2 +; AVX1-NEXT: ## BB#1: ## %cond.store +; AVX1-NEXT: vpextrb $0, %xmm0, (%rax) +; AVX1-NEXT: LBB58_2: ## %else +; AVX1-NEXT: testb $1, %sil +; AVX1-NEXT: je LBB58_4 +; AVX1-NEXT: ## BB#3: ## %cond.store1 +; AVX1-NEXT: vpextrb $1, %xmm0, 1(%rax) +; AVX1-NEXT: LBB58_4: ## %else2 +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_6 +; AVX1-NEXT: ## BB#5: ## %cond.store3 +; AVX1-NEXT: vpextrb $2, %xmm0, 2(%rax) +; AVX1-NEXT: LBB58_6: ## %else4 +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_8 +; AVX1-NEXT: ## BB#7: ## %cond.store5 +; AVX1-NEXT: vpextrb $3, %xmm0, 3(%rax) +; AVX1-NEXT: LBB58_8: ## %else6 +; AVX1-NEXT: testb $1, %r8b +; AVX1-NEXT: je LBB58_10 +; AVX1-NEXT: ## BB#9: ## %cond.store7 +; AVX1-NEXT: vpextrb $4, %xmm0, 4(%rax) +; AVX1-NEXT: LBB58_10: ## %else8 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %r9b +; AVX1-NEXT: je LBB58_12 +; AVX1-NEXT: ## BB#11: ## %cond.store9 +; AVX1-NEXT: vpextrb $5, %xmm0, 5(%rax) +; AVX1-NEXT: LBB58_12: ## %else10 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_14 +; AVX1-NEXT: ## BB#13: ## %cond.store11 +; AVX1-NEXT: vpextrb $6, %xmm0, 6(%rax) +; AVX1-NEXT: LBB58_14: ## %else12 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_16 +; AVX1-NEXT: ## BB#15: ## %cond.store13 +; AVX1-NEXT: vpextrb $7, %xmm0, 7(%rax) +; AVX1-NEXT: LBB58_16: ## %else14 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_18 +; AVX1-NEXT: ## BB#17: ## %cond.store15 +; AVX1-NEXT: vpextrb $8, %xmm0, 8(%rax) +; AVX1-NEXT: LBB58_18: ## %else16 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_20 +; AVX1-NEXT: ## BB#19: ## %cond.store17 +; AVX1-NEXT: vpextrb $9, %xmm0, 9(%rax) +; AVX1-NEXT: LBB58_20: ## %else18 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_22 +; AVX1-NEXT: ## BB#21: ## %cond.store19 +; AVX1-NEXT: vpextrb $10, %xmm0, 10(%rax) +; AVX1-NEXT: LBB58_22: ## %else20 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_24 +; AVX1-NEXT: ## BB#23: ## %cond.store21 +; AVX1-NEXT: vpextrb $11, %xmm0, 11(%rax) +; AVX1-NEXT: LBB58_24: ## %else22 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_26 +; AVX1-NEXT: ## BB#25: ## %cond.store23 +; AVX1-NEXT: vpextrb $12, %xmm0, 12(%rax) +; AVX1-NEXT: LBB58_26: ## %else24 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_28 +; AVX1-NEXT: ## BB#27: ## %cond.store25 +; AVX1-NEXT: vpextrb $13, %xmm0, 13(%rax) +; AVX1-NEXT: LBB58_28: ## %else26 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_30 +; AVX1-NEXT: ## BB#29: ## %cond.store27 +; AVX1-NEXT: vpextrb $14, %xmm0, 14(%rax) +; AVX1-NEXT: LBB58_30: ## %else28 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_32 +; AVX1-NEXT: ## BB#31: ## %cond.store29 +; AVX1-NEXT: vpextrb $15, %xmm0, 15(%rax) +; AVX1-NEXT: LBB58_32: ## %else30 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_34 +; AVX1-NEXT: ## BB#33: ## %cond.store31 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $0, %xmm2, 16(%rax) +; AVX1-NEXT: LBB58_34: ## %else32 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_36 +; AVX1-NEXT: ## BB#35: ## %cond.store33 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $1, %xmm2, 17(%rax) +; AVX1-NEXT: LBB58_36: ## %else34 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_38 +; AVX1-NEXT: ## BB#37: ## %cond.store35 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $2, %xmm2, 18(%rax) +; AVX1-NEXT: LBB58_38: ## %else36 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_40 +; AVX1-NEXT: ## BB#39: ## %cond.store37 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $3, %xmm2, 19(%rax) +; AVX1-NEXT: LBB58_40: ## %else38 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_42 +; AVX1-NEXT: ## BB#41: ## %cond.store39 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $4, %xmm2, 20(%rax) +; AVX1-NEXT: LBB58_42: ## %else40 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_44 +; AVX1-NEXT: ## BB#43: ## %cond.store41 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $5, %xmm2, 21(%rax) +; AVX1-NEXT: LBB58_44: ## %else42 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_46 +; AVX1-NEXT: ## BB#45: ## %cond.store43 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $6, %xmm2, 22(%rax) +; AVX1-NEXT: LBB58_46: ## %else44 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_48 +; AVX1-NEXT: ## BB#47: ## %cond.store45 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $7, %xmm2, 23(%rax) +; AVX1-NEXT: LBB58_48: ## %else46 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_50 +; AVX1-NEXT: ## BB#49: ## %cond.store47 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $8, %xmm2, 24(%rax) +; AVX1-NEXT: LBB58_50: ## %else48 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_52 +; AVX1-NEXT: ## BB#51: ## %cond.store49 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $9, %xmm2, 25(%rax) +; AVX1-NEXT: LBB58_52: ## %else50 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_54 +; AVX1-NEXT: ## BB#53: ## %cond.store51 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $10, %xmm2, 26(%rax) +; AVX1-NEXT: LBB58_54: ## %else52 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_56 +; AVX1-NEXT: ## BB#55: ## %cond.store53 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $11, %xmm2, 27(%rax) +; AVX1-NEXT: LBB58_56: ## %else54 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_58 +; AVX1-NEXT: ## BB#57: ## %cond.store55 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $12, %xmm2, 28(%rax) +; AVX1-NEXT: LBB58_58: ## %else56 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_60 +; AVX1-NEXT: ## BB#59: ## %cond.store57 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $13, %xmm2, 29(%rax) +; AVX1-NEXT: LBB58_60: ## %else58 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_62 +; AVX1-NEXT: ## BB#61: ## %cond.store59 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpextrb $14, %xmm2, 30(%rax) +; AVX1-NEXT: LBB58_62: ## %else60 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_64 +; AVX1-NEXT: ## BB#63: ## %cond.store61 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpextrb $15, %xmm0, 31(%rax) +; AVX1-NEXT: LBB58_64: ## %else62 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_66 +; AVX1-NEXT: ## BB#65: ## %cond.store63 +; AVX1-NEXT: vpextrb $0, %xmm1, 32(%rax) +; AVX1-NEXT: LBB58_66: ## %else64 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_68 +; AVX1-NEXT: ## BB#67: ## %cond.store65 +; AVX1-NEXT: vpextrb $1, %xmm1, 33(%rax) +; AVX1-NEXT: LBB58_68: ## %else66 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_70 +; AVX1-NEXT: ## BB#69: ## %cond.store67 +; AVX1-NEXT: vpextrb $2, %xmm1, 34(%rax) +; AVX1-NEXT: LBB58_70: ## %else68 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_72 +; AVX1-NEXT: ## BB#71: ## %cond.store69 +; AVX1-NEXT: vpextrb $3, %xmm1, 35(%rax) +; AVX1-NEXT: LBB58_72: ## %else70 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_74 +; AVX1-NEXT: ## BB#73: ## %cond.store71 +; AVX1-NEXT: vpextrb $4, %xmm1, 36(%rax) +; AVX1-NEXT: LBB58_74: ## %else72 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_76 +; AVX1-NEXT: ## BB#75: ## %cond.store73 +; AVX1-NEXT: vpextrb $5, %xmm1, 37(%rax) +; AVX1-NEXT: LBB58_76: ## %else74 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_78 +; AVX1-NEXT: ## BB#77: ## %cond.store75 +; AVX1-NEXT: vpextrb $6, %xmm1, 38(%rax) +; AVX1-NEXT: LBB58_78: ## %else76 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_80 +; AVX1-NEXT: ## BB#79: ## %cond.store77 +; AVX1-NEXT: vpextrb $7, %xmm1, 39(%rax) +; AVX1-NEXT: LBB58_80: ## %else78 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_82 +; AVX1-NEXT: ## BB#81: ## %cond.store79 +; AVX1-NEXT: vpextrb $8, %xmm1, 40(%rax) +; AVX1-NEXT: LBB58_82: ## %else80 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_84 +; AVX1-NEXT: ## BB#83: ## %cond.store81 +; AVX1-NEXT: vpextrb $9, %xmm1, 41(%rax) +; AVX1-NEXT: LBB58_84: ## %else82 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_86 +; AVX1-NEXT: ## BB#85: ## %cond.store83 +; AVX1-NEXT: vpextrb $10, %xmm1, 42(%rax) +; AVX1-NEXT: LBB58_86: ## %else84 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_88 +; AVX1-NEXT: ## BB#87: ## %cond.store85 +; AVX1-NEXT: vpextrb $11, %xmm1, 43(%rax) +; AVX1-NEXT: LBB58_88: ## %else86 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_90 +; AVX1-NEXT: ## BB#89: ## %cond.store87 +; AVX1-NEXT: vpextrb $12, %xmm1, 44(%rax) +; AVX1-NEXT: LBB58_90: ## %else88 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_92 +; AVX1-NEXT: ## BB#91: ## %cond.store89 +; AVX1-NEXT: vpextrb $13, %xmm1, 45(%rax) +; AVX1-NEXT: LBB58_92: ## %else90 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_94 +; AVX1-NEXT: ## BB#93: ## %cond.store91 +; AVX1-NEXT: vpextrb $14, %xmm1, 46(%rax) +; AVX1-NEXT: LBB58_94: ## %else92 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_96 +; AVX1-NEXT: ## BB#95: ## %cond.store93 +; AVX1-NEXT: vpextrb $15, %xmm1, 47(%rax) +; AVX1-NEXT: LBB58_96: ## %else94 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_98 +; AVX1-NEXT: ## BB#97: ## %cond.store95 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $0, %xmm0, 48(%rax) +; AVX1-NEXT: LBB58_98: ## %else96 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_100 +; AVX1-NEXT: ## BB#99: ## %cond.store97 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $1, %xmm0, 49(%rax) +; AVX1-NEXT: LBB58_100: ## %else98 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_102 +; AVX1-NEXT: ## BB#101: ## %cond.store99 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $2, %xmm0, 50(%rax) +; AVX1-NEXT: LBB58_102: ## %else100 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_104 +; AVX1-NEXT: ## BB#103: ## %cond.store101 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $3, %xmm0, 51(%rax) +; AVX1-NEXT: LBB58_104: ## %else102 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_106 +; AVX1-NEXT: ## BB#105: ## %cond.store103 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $4, %xmm0, 52(%rax) +; AVX1-NEXT: LBB58_106: ## %else104 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_108 +; AVX1-NEXT: ## BB#107: ## %cond.store105 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $5, %xmm0, 53(%rax) +; AVX1-NEXT: LBB58_108: ## %else106 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_110 +; AVX1-NEXT: ## BB#109: ## %cond.store107 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $6, %xmm0, 54(%rax) +; AVX1-NEXT: LBB58_110: ## %else108 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_112 +; AVX1-NEXT: ## BB#111: ## %cond.store109 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $7, %xmm0, 55(%rax) +; AVX1-NEXT: LBB58_112: ## %else110 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_114 +; AVX1-NEXT: ## BB#113: ## %cond.store111 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $8, %xmm0, 56(%rax) +; AVX1-NEXT: LBB58_114: ## %else112 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_116 +; AVX1-NEXT: ## BB#115: ## %cond.store113 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $9, %xmm0, 57(%rax) +; AVX1-NEXT: LBB58_116: ## %else114 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_118 +; AVX1-NEXT: ## BB#117: ## %cond.store115 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $10, %xmm0, 58(%rax) +; AVX1-NEXT: LBB58_118: ## %else116 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_120 +; AVX1-NEXT: ## BB#119: ## %cond.store117 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $11, %xmm0, 59(%rax) +; AVX1-NEXT: LBB58_120: ## %else118 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_122 +; AVX1-NEXT: ## BB#121: ## %cond.store119 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $12, %xmm0, 60(%rax) +; AVX1-NEXT: LBB58_122: ## %else120 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_124 +; AVX1-NEXT: ## BB#123: ## %cond.store121 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $13, %xmm0, 61(%rax) +; AVX1-NEXT: LBB58_124: ## %else122 +; AVX1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX1-NEXT: testb $1, %cl +; AVX1-NEXT: je LBB58_126 +; AVX1-NEXT: ## BB#125: ## %cond.store123 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $14, %xmm0, 62(%rax) +; AVX1-NEXT: LBB58_126: ## %else124 +; AVX1-NEXT: testb $1, %dl +; AVX1-NEXT: je LBB58_128 +; AVX1-NEXT: ## BB#127: ## %cond.store125 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrb $15, %xmm0, 63(%rax) +; AVX1-NEXT: LBB58_128: ## %else126 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_mask_store_64xi8: +; AVX2: ## BB#0: +; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax +; AVX2-NEXT: testb $1, %dil +; AVX2-NEXT: je LBB58_2 +; AVX2-NEXT: ## BB#1: ## %cond.store +; AVX2-NEXT: vpextrb $0, %xmm0, (%rax) +; AVX2-NEXT: LBB58_2: ## %else +; AVX2-NEXT: testb $1, %sil +; AVX2-NEXT: je LBB58_4 +; AVX2-NEXT: ## BB#3: ## %cond.store1 +; AVX2-NEXT: vpextrb $1, %xmm0, 1(%rax) +; AVX2-NEXT: LBB58_4: ## %else2 +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_6 +; AVX2-NEXT: ## BB#5: ## %cond.store3 +; AVX2-NEXT: vpextrb $2, %xmm0, 2(%rax) +; AVX2-NEXT: LBB58_6: ## %else4 +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_8 +; AVX2-NEXT: ## BB#7: ## %cond.store5 +; AVX2-NEXT: vpextrb $3, %xmm0, 3(%rax) +; AVX2-NEXT: LBB58_8: ## %else6 +; AVX2-NEXT: testb $1, %r8b +; AVX2-NEXT: je LBB58_10 +; AVX2-NEXT: ## BB#9: ## %cond.store7 +; AVX2-NEXT: vpextrb $4, %xmm0, 4(%rax) +; AVX2-NEXT: LBB58_10: ## %else8 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %r9b +; AVX2-NEXT: je LBB58_12 +; AVX2-NEXT: ## BB#11: ## %cond.store9 +; AVX2-NEXT: vpextrb $5, %xmm0, 5(%rax) +; AVX2-NEXT: LBB58_12: ## %else10 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_14 +; AVX2-NEXT: ## BB#13: ## %cond.store11 +; AVX2-NEXT: vpextrb $6, %xmm0, 6(%rax) +; AVX2-NEXT: LBB58_14: ## %else12 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_16 +; AVX2-NEXT: ## BB#15: ## %cond.store13 +; AVX2-NEXT: vpextrb $7, %xmm0, 7(%rax) +; AVX2-NEXT: LBB58_16: ## %else14 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_18 +; AVX2-NEXT: ## BB#17: ## %cond.store15 +; AVX2-NEXT: vpextrb $8, %xmm0, 8(%rax) +; AVX2-NEXT: LBB58_18: ## %else16 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_20 +; AVX2-NEXT: ## BB#19: ## %cond.store17 +; AVX2-NEXT: vpextrb $9, %xmm0, 9(%rax) +; AVX2-NEXT: LBB58_20: ## %else18 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_22 +; AVX2-NEXT: ## BB#21: ## %cond.store19 +; AVX2-NEXT: vpextrb $10, %xmm0, 10(%rax) +; AVX2-NEXT: LBB58_22: ## %else20 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_24 +; AVX2-NEXT: ## BB#23: ## %cond.store21 +; AVX2-NEXT: vpextrb $11, %xmm0, 11(%rax) +; AVX2-NEXT: LBB58_24: ## %else22 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_26 +; AVX2-NEXT: ## BB#25: ## %cond.store23 +; AVX2-NEXT: vpextrb $12, %xmm0, 12(%rax) +; AVX2-NEXT: LBB58_26: ## %else24 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_28 +; AVX2-NEXT: ## BB#27: ## %cond.store25 +; AVX2-NEXT: vpextrb $13, %xmm0, 13(%rax) +; AVX2-NEXT: LBB58_28: ## %else26 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_30 +; AVX2-NEXT: ## BB#29: ## %cond.store27 +; AVX2-NEXT: vpextrb $14, %xmm0, 14(%rax) +; AVX2-NEXT: LBB58_30: ## %else28 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_32 +; AVX2-NEXT: ## BB#31: ## %cond.store29 +; AVX2-NEXT: vpextrb $15, %xmm0, 15(%rax) +; AVX2-NEXT: LBB58_32: ## %else30 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_34 +; AVX2-NEXT: ## BB#33: ## %cond.store31 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $0, %xmm2, 16(%rax) +; AVX2-NEXT: LBB58_34: ## %else32 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_36 +; AVX2-NEXT: ## BB#35: ## %cond.store33 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $1, %xmm2, 17(%rax) +; AVX2-NEXT: LBB58_36: ## %else34 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_38 +; AVX2-NEXT: ## BB#37: ## %cond.store35 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $2, %xmm2, 18(%rax) +; AVX2-NEXT: LBB58_38: ## %else36 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_40 +; AVX2-NEXT: ## BB#39: ## %cond.store37 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $3, %xmm2, 19(%rax) +; AVX2-NEXT: LBB58_40: ## %else38 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_42 +; AVX2-NEXT: ## BB#41: ## %cond.store39 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $4, %xmm2, 20(%rax) +; AVX2-NEXT: LBB58_42: ## %else40 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_44 +; AVX2-NEXT: ## BB#43: ## %cond.store41 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $5, %xmm2, 21(%rax) +; AVX2-NEXT: LBB58_44: ## %else42 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_46 +; AVX2-NEXT: ## BB#45: ## %cond.store43 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $6, %xmm2, 22(%rax) +; AVX2-NEXT: LBB58_46: ## %else44 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_48 +; AVX2-NEXT: ## BB#47: ## %cond.store45 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $7, %xmm2, 23(%rax) +; AVX2-NEXT: LBB58_48: ## %else46 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_50 +; AVX2-NEXT: ## BB#49: ## %cond.store47 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $8, %xmm2, 24(%rax) +; AVX2-NEXT: LBB58_50: ## %else48 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_52 +; AVX2-NEXT: ## BB#51: ## %cond.store49 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $9, %xmm2, 25(%rax) +; AVX2-NEXT: LBB58_52: ## %else50 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_54 +; AVX2-NEXT: ## BB#53: ## %cond.store51 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $10, %xmm2, 26(%rax) +; AVX2-NEXT: LBB58_54: ## %else52 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_56 +; AVX2-NEXT: ## BB#55: ## %cond.store53 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $11, %xmm2, 27(%rax) +; AVX2-NEXT: LBB58_56: ## %else54 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_58 +; AVX2-NEXT: ## BB#57: ## %cond.store55 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $12, %xmm2, 28(%rax) +; AVX2-NEXT: LBB58_58: ## %else56 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_60 +; AVX2-NEXT: ## BB#59: ## %cond.store57 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $13, %xmm2, 29(%rax) +; AVX2-NEXT: LBB58_60: ## %else58 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_62 +; AVX2-NEXT: ## BB#61: ## %cond.store59 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpextrb $14, %xmm2, 30(%rax) +; AVX2-NEXT: LBB58_62: ## %else60 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_64 +; AVX2-NEXT: ## BB#63: ## %cond.store61 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpextrb $15, %xmm0, 31(%rax) +; AVX2-NEXT: LBB58_64: ## %else62 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_66 +; AVX2-NEXT: ## BB#65: ## %cond.store63 +; AVX2-NEXT: vpextrb $0, %xmm1, 32(%rax) +; AVX2-NEXT: LBB58_66: ## %else64 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_68 +; AVX2-NEXT: ## BB#67: ## %cond.store65 +; AVX2-NEXT: vpextrb $1, %xmm1, 33(%rax) +; AVX2-NEXT: LBB58_68: ## %else66 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_70 +; AVX2-NEXT: ## BB#69: ## %cond.store67 +; AVX2-NEXT: vpextrb $2, %xmm1, 34(%rax) +; AVX2-NEXT: LBB58_70: ## %else68 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_72 +; AVX2-NEXT: ## BB#71: ## %cond.store69 +; AVX2-NEXT: vpextrb $3, %xmm1, 35(%rax) +; AVX2-NEXT: LBB58_72: ## %else70 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_74 +; AVX2-NEXT: ## BB#73: ## %cond.store71 +; AVX2-NEXT: vpextrb $4, %xmm1, 36(%rax) +; AVX2-NEXT: LBB58_74: ## %else72 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_76 +; AVX2-NEXT: ## BB#75: ## %cond.store73 +; AVX2-NEXT: vpextrb $5, %xmm1, 37(%rax) +; AVX2-NEXT: LBB58_76: ## %else74 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_78 +; AVX2-NEXT: ## BB#77: ## %cond.store75 +; AVX2-NEXT: vpextrb $6, %xmm1, 38(%rax) +; AVX2-NEXT: LBB58_78: ## %else76 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_80 +; AVX2-NEXT: ## BB#79: ## %cond.store77 +; AVX2-NEXT: vpextrb $7, %xmm1, 39(%rax) +; AVX2-NEXT: LBB58_80: ## %else78 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_82 +; AVX2-NEXT: ## BB#81: ## %cond.store79 +; AVX2-NEXT: vpextrb $8, %xmm1, 40(%rax) +; AVX2-NEXT: LBB58_82: ## %else80 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_84 +; AVX2-NEXT: ## BB#83: ## %cond.store81 +; AVX2-NEXT: vpextrb $9, %xmm1, 41(%rax) +; AVX2-NEXT: LBB58_84: ## %else82 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_86 +; AVX2-NEXT: ## BB#85: ## %cond.store83 +; AVX2-NEXT: vpextrb $10, %xmm1, 42(%rax) +; AVX2-NEXT: LBB58_86: ## %else84 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_88 +; AVX2-NEXT: ## BB#87: ## %cond.store85 +; AVX2-NEXT: vpextrb $11, %xmm1, 43(%rax) +; AVX2-NEXT: LBB58_88: ## %else86 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_90 +; AVX2-NEXT: ## BB#89: ## %cond.store87 +; AVX2-NEXT: vpextrb $12, %xmm1, 44(%rax) +; AVX2-NEXT: LBB58_90: ## %else88 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_92 +; AVX2-NEXT: ## BB#91: ## %cond.store89 +; AVX2-NEXT: vpextrb $13, %xmm1, 45(%rax) +; AVX2-NEXT: LBB58_92: ## %else90 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_94 +; AVX2-NEXT: ## BB#93: ## %cond.store91 +; AVX2-NEXT: vpextrb $14, %xmm1, 46(%rax) +; AVX2-NEXT: LBB58_94: ## %else92 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_96 +; AVX2-NEXT: ## BB#95: ## %cond.store93 +; AVX2-NEXT: vpextrb $15, %xmm1, 47(%rax) +; AVX2-NEXT: LBB58_96: ## %else94 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_98 +; AVX2-NEXT: ## BB#97: ## %cond.store95 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $0, %xmm0, 48(%rax) +; AVX2-NEXT: LBB58_98: ## %else96 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_100 +; AVX2-NEXT: ## BB#99: ## %cond.store97 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $1, %xmm0, 49(%rax) +; AVX2-NEXT: LBB58_100: ## %else98 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_102 +; AVX2-NEXT: ## BB#101: ## %cond.store99 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $2, %xmm0, 50(%rax) +; AVX2-NEXT: LBB58_102: ## %else100 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_104 +; AVX2-NEXT: ## BB#103: ## %cond.store101 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $3, %xmm0, 51(%rax) +; AVX2-NEXT: LBB58_104: ## %else102 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_106 +; AVX2-NEXT: ## BB#105: ## %cond.store103 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $4, %xmm0, 52(%rax) +; AVX2-NEXT: LBB58_106: ## %else104 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_108 +; AVX2-NEXT: ## BB#107: ## %cond.store105 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $5, %xmm0, 53(%rax) +; AVX2-NEXT: LBB58_108: ## %else106 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_110 +; AVX2-NEXT: ## BB#109: ## %cond.store107 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $6, %xmm0, 54(%rax) +; AVX2-NEXT: LBB58_110: ## %else108 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_112 +; AVX2-NEXT: ## BB#111: ## %cond.store109 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $7, %xmm0, 55(%rax) +; AVX2-NEXT: LBB58_112: ## %else110 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_114 +; AVX2-NEXT: ## BB#113: ## %cond.store111 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $8, %xmm0, 56(%rax) +; AVX2-NEXT: LBB58_114: ## %else112 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_116 +; AVX2-NEXT: ## BB#115: ## %cond.store113 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $9, %xmm0, 57(%rax) +; AVX2-NEXT: LBB58_116: ## %else114 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_118 +; AVX2-NEXT: ## BB#117: ## %cond.store115 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $10, %xmm0, 58(%rax) +; AVX2-NEXT: LBB58_118: ## %else116 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_120 +; AVX2-NEXT: ## BB#119: ## %cond.store117 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $11, %xmm0, 59(%rax) +; AVX2-NEXT: LBB58_120: ## %else118 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_122 +; AVX2-NEXT: ## BB#121: ## %cond.store119 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $12, %xmm0, 60(%rax) +; AVX2-NEXT: LBB58_122: ## %else120 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %cl +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_124 +; AVX2-NEXT: ## BB#123: ## %cond.store121 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $13, %xmm0, 61(%rax) +; AVX2-NEXT: LBB58_124: ## %else122 +; AVX2-NEXT: movb {{[0-9]+}}(%rsp), %dl +; AVX2-NEXT: testb $1, %cl +; AVX2-NEXT: je LBB58_126 +; AVX2-NEXT: ## BB#125: ## %cond.store123 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $14, %xmm0, 62(%rax) +; AVX2-NEXT: LBB58_126: ## %else124 +; AVX2-NEXT: testb $1, %dl +; AVX2-NEXT: je LBB58_128 +; AVX2-NEXT: ## BB#127: ## %cond.store125 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrb $15, %xmm0, 63(%rax) +; AVX2-NEXT: LBB58_128: ## %else126 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: test_mask_store_64xi8: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_2 +; AVX512F-NEXT: ## BB#1: ## %cond.store +; AVX512F-NEXT: vpextrb $0, %xmm4, (%rdi) +; AVX512F-NEXT: LBB58_2: ## %else +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_4 +; AVX512F-NEXT: ## BB#3: ## %cond.store1 +; AVX512F-NEXT: vpextrb $1, %xmm4, 1(%rdi) +; AVX512F-NEXT: LBB58_4: ## %else2 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_6 +; AVX512F-NEXT: ## BB#5: ## %cond.store3 +; AVX512F-NEXT: vpextrb $2, %xmm4, 2(%rdi) +; AVX512F-NEXT: LBB58_6: ## %else4 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_8 +; AVX512F-NEXT: ## BB#7: ## %cond.store5 +; AVX512F-NEXT: vpextrb $3, %xmm4, 3(%rdi) +; AVX512F-NEXT: LBB58_8: ## %else6 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_10 +; AVX512F-NEXT: ## BB#9: ## %cond.store7 +; AVX512F-NEXT: vpextrb $4, %xmm4, 4(%rdi) +; AVX512F-NEXT: LBB58_10: ## %else8 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_12 +; AVX512F-NEXT: ## BB#11: ## %cond.store9 +; AVX512F-NEXT: vpextrb $5, %xmm4, 5(%rdi) +; AVX512F-NEXT: LBB58_12: ## %else10 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_14 +; AVX512F-NEXT: ## BB#13: ## %cond.store11 +; AVX512F-NEXT: vpextrb $6, %xmm4, 6(%rdi) +; AVX512F-NEXT: LBB58_14: ## %else12 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_16 +; AVX512F-NEXT: ## BB#15: ## %cond.store13 +; AVX512F-NEXT: vpextrb $7, %xmm4, 7(%rdi) +; AVX512F-NEXT: LBB58_16: ## %else14 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_18 +; AVX512F-NEXT: ## BB#17: ## %cond.store15 +; AVX512F-NEXT: vpextrb $8, %xmm4, 8(%rdi) +; AVX512F-NEXT: LBB58_18: ## %else16 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_20 +; AVX512F-NEXT: ## BB#19: ## %cond.store17 +; AVX512F-NEXT: vpextrb $9, %xmm4, 9(%rdi) +; AVX512F-NEXT: LBB58_20: ## %else18 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_22 +; AVX512F-NEXT: ## BB#21: ## %cond.store19 +; AVX512F-NEXT: vpextrb $10, %xmm4, 10(%rdi) +; AVX512F-NEXT: LBB58_22: ## %else20 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_24 +; AVX512F-NEXT: ## BB#23: ## %cond.store21 +; AVX512F-NEXT: vpextrb $11, %xmm4, 11(%rdi) +; AVX512F-NEXT: LBB58_24: ## %else22 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_26 +; AVX512F-NEXT: ## BB#25: ## %cond.store23 +; AVX512F-NEXT: vpextrb $12, %xmm4, 12(%rdi) +; AVX512F-NEXT: LBB58_26: ## %else24 +; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_28 +; AVX512F-NEXT: ## BB#27: ## %cond.store25 +; AVX512F-NEXT: vpextrb $13, %xmm4, 13(%rdi) +; AVX512F-NEXT: LBB58_28: ## %else26 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_30 +; AVX512F-NEXT: ## BB#29: ## %cond.store27 +; AVX512F-NEXT: vpextrb $14, %xmm4, 14(%rdi) +; AVX512F-NEXT: LBB58_30: ## %else28 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: kshiftlw $0, %k0, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_32 +; AVX512F-NEXT: ## BB#31: ## %cond.store29 +; AVX512F-NEXT: vpextrb $15, %xmm4, 15(%rdi) +; AVX512F-NEXT: LBB58_32: ## %else30 +; AVX512F-NEXT: kshiftlw $15, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_34 +; AVX512F-NEXT: ## BB#33: ## %cond.store31 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $0, %xmm0, 16(%rdi) +; AVX512F-NEXT: LBB58_34: ## %else32 +; AVX512F-NEXT: kshiftlw $14, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_36 +; AVX512F-NEXT: ## BB#35: ## %cond.store33 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $1, %xmm0, 17(%rdi) +; AVX512F-NEXT: LBB58_36: ## %else34 +; AVX512F-NEXT: kshiftlw $13, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_38 +; AVX512F-NEXT: ## BB#37: ## %cond.store35 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $2, %xmm0, 18(%rdi) +; AVX512F-NEXT: LBB58_38: ## %else36 +; AVX512F-NEXT: kshiftlw $12, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_40 +; AVX512F-NEXT: ## BB#39: ## %cond.store37 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $3, %xmm0, 19(%rdi) +; AVX512F-NEXT: LBB58_40: ## %else38 +; AVX512F-NEXT: kshiftlw $11, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_42 +; AVX512F-NEXT: ## BB#41: ## %cond.store39 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $4, %xmm0, 20(%rdi) +; AVX512F-NEXT: LBB58_42: ## %else40 +; AVX512F-NEXT: kshiftlw $10, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_44 +; AVX512F-NEXT: ## BB#43: ## %cond.store41 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $5, %xmm0, 21(%rdi) +; AVX512F-NEXT: LBB58_44: ## %else42 +; AVX512F-NEXT: kshiftlw $9, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_46 +; AVX512F-NEXT: ## BB#45: ## %cond.store43 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $6, %xmm0, 22(%rdi) +; AVX512F-NEXT: LBB58_46: ## %else44 +; AVX512F-NEXT: kshiftlw $8, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_48 +; AVX512F-NEXT: ## BB#47: ## %cond.store45 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $7, %xmm0, 23(%rdi) +; AVX512F-NEXT: LBB58_48: ## %else46 +; AVX512F-NEXT: kshiftlw $7, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_50 +; AVX512F-NEXT: ## BB#49: ## %cond.store47 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $8, %xmm0, 24(%rdi) +; AVX512F-NEXT: LBB58_50: ## %else48 +; AVX512F-NEXT: kshiftlw $6, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_52 +; AVX512F-NEXT: ## BB#51: ## %cond.store49 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $9, %xmm0, 25(%rdi) +; AVX512F-NEXT: LBB58_52: ## %else50 +; AVX512F-NEXT: kshiftlw $5, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_54 +; AVX512F-NEXT: ## BB#53: ## %cond.store51 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $10, %xmm0, 26(%rdi) +; AVX512F-NEXT: LBB58_54: ## %else52 +; AVX512F-NEXT: kshiftlw $4, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_56 +; AVX512F-NEXT: ## BB#55: ## %cond.store53 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $11, %xmm0, 27(%rdi) +; AVX512F-NEXT: LBB58_56: ## %else54 +; AVX512F-NEXT: kshiftlw $3, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_58 +; AVX512F-NEXT: ## BB#57: ## %cond.store55 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $12, %xmm0, 28(%rdi) +; AVX512F-NEXT: LBB58_58: ## %else56 +; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm0 +; AVX512F-NEXT: kshiftlw $2, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_60 +; AVX512F-NEXT: ## BB#59: ## %cond.store57 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm1 +; AVX512F-NEXT: vpextrb $13, %xmm1, 29(%rdi) +; AVX512F-NEXT: LBB58_60: ## %else58 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: kshiftlw $1, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_62 +; AVX512F-NEXT: ## BB#61: ## %cond.store59 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm1 +; AVX512F-NEXT: vpextrb $14, %xmm1, 30(%rdi) +; AVX512F-NEXT: LBB58_62: ## %else60 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $0, %k1, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_64 +; AVX512F-NEXT: ## BB#63: ## %cond.store61 +; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm0 +; AVX512F-NEXT: vpextrb $15, %xmm0, 31(%rdi) +; AVX512F-NEXT: LBB58_64: ## %else62 +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_66 +; AVX512F-NEXT: ## BB#65: ## %cond.store63 +; AVX512F-NEXT: vpextrb $0, %xmm5, 32(%rdi) +; AVX512F-NEXT: LBB58_66: ## %else64 +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_68 +; AVX512F-NEXT: ## BB#67: ## %cond.store65 +; AVX512F-NEXT: vpextrb $1, %xmm5, 33(%rdi) +; AVX512F-NEXT: LBB58_68: ## %else66 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_70 +; AVX512F-NEXT: ## BB#69: ## %cond.store67 +; AVX512F-NEXT: vpextrb $2, %xmm5, 34(%rdi) +; AVX512F-NEXT: LBB58_70: ## %else68 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_72 +; AVX512F-NEXT: ## BB#71: ## %cond.store69 +; AVX512F-NEXT: vpextrb $3, %xmm5, 35(%rdi) +; AVX512F-NEXT: LBB58_72: ## %else70 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_74 +; AVX512F-NEXT: ## BB#73: ## %cond.store71 +; AVX512F-NEXT: vpextrb $4, %xmm5, 36(%rdi) +; AVX512F-NEXT: LBB58_74: ## %else72 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_76 +; AVX512F-NEXT: ## BB#75: ## %cond.store73 +; AVX512F-NEXT: vpextrb $5, %xmm5, 37(%rdi) +; AVX512F-NEXT: LBB58_76: ## %else74 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_78 +; AVX512F-NEXT: ## BB#77: ## %cond.store75 +; AVX512F-NEXT: vpextrb $6, %xmm5, 38(%rdi) +; AVX512F-NEXT: LBB58_78: ## %else76 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_80 +; AVX512F-NEXT: ## BB#79: ## %cond.store77 +; AVX512F-NEXT: vpextrb $7, %xmm5, 39(%rdi) +; AVX512F-NEXT: LBB58_80: ## %else78 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_82 +; AVX512F-NEXT: ## BB#81: ## %cond.store79 +; AVX512F-NEXT: vpextrb $8, %xmm5, 40(%rdi) +; AVX512F-NEXT: LBB58_82: ## %else80 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_84 +; AVX512F-NEXT: ## BB#83: ## %cond.store81 +; AVX512F-NEXT: vpextrb $9, %xmm5, 41(%rdi) +; AVX512F-NEXT: LBB58_84: ## %else82 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_86 +; AVX512F-NEXT: ## BB#85: ## %cond.store83 +; AVX512F-NEXT: vpextrb $10, %xmm5, 42(%rdi) +; AVX512F-NEXT: LBB58_86: ## %else84 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_88 +; AVX512F-NEXT: ## BB#87: ## %cond.store85 +; AVX512F-NEXT: vpextrb $11, %xmm5, 43(%rdi) +; AVX512F-NEXT: LBB58_88: ## %else86 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_90 +; AVX512F-NEXT: ## BB#89: ## %cond.store87 +; AVX512F-NEXT: vpextrb $12, %xmm5, 44(%rdi) +; AVX512F-NEXT: LBB58_90: ## %else88 +; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm0 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_92 +; AVX512F-NEXT: ## BB#91: ## %cond.store89 +; AVX512F-NEXT: vpextrb $13, %xmm5, 45(%rdi) +; AVX512F-NEXT: LBB58_92: ## %else90 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_94 +; AVX512F-NEXT: ## BB#93: ## %cond.store91 +; AVX512F-NEXT: vpextrb $14, %xmm5, 46(%rdi) +; AVX512F-NEXT: LBB58_94: ## %else92 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512F-NEXT: kshiftlw $0, %k0, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_96 +; AVX512F-NEXT: ## BB#95: ## %cond.store93 +; AVX512F-NEXT: vpextrb $15, %xmm5, 47(%rdi) +; AVX512F-NEXT: LBB58_96: ## %else94 +; AVX512F-NEXT: kshiftlw $15, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_98 +; AVX512F-NEXT: ## BB#97: ## %cond.store95 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $0, %xmm0, 48(%rdi) +; AVX512F-NEXT: LBB58_98: ## %else96 +; AVX512F-NEXT: kshiftlw $14, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_100 +; AVX512F-NEXT: ## BB#99: ## %cond.store97 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $1, %xmm0, 49(%rdi) +; AVX512F-NEXT: LBB58_100: ## %else98 +; AVX512F-NEXT: kshiftlw $13, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_102 +; AVX512F-NEXT: ## BB#101: ## %cond.store99 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $2, %xmm0, 50(%rdi) +; AVX512F-NEXT: LBB58_102: ## %else100 +; AVX512F-NEXT: kshiftlw $12, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_104 +; AVX512F-NEXT: ## BB#103: ## %cond.store101 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $3, %xmm0, 51(%rdi) +; AVX512F-NEXT: LBB58_104: ## %else102 +; AVX512F-NEXT: kshiftlw $11, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_106 +; AVX512F-NEXT: ## BB#105: ## %cond.store103 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $4, %xmm0, 52(%rdi) +; AVX512F-NEXT: LBB58_106: ## %else104 +; AVX512F-NEXT: kshiftlw $10, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_108 +; AVX512F-NEXT: ## BB#107: ## %cond.store105 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $5, %xmm0, 53(%rdi) +; AVX512F-NEXT: LBB58_108: ## %else106 +; AVX512F-NEXT: kshiftlw $9, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_110 +; AVX512F-NEXT: ## BB#109: ## %cond.store107 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $6, %xmm0, 54(%rdi) +; AVX512F-NEXT: LBB58_110: ## %else108 +; AVX512F-NEXT: kshiftlw $8, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_112 +; AVX512F-NEXT: ## BB#111: ## %cond.store109 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $7, %xmm0, 55(%rdi) +; AVX512F-NEXT: LBB58_112: ## %else110 +; AVX512F-NEXT: kshiftlw $7, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_114 +; AVX512F-NEXT: ## BB#113: ## %cond.store111 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $8, %xmm0, 56(%rdi) +; AVX512F-NEXT: LBB58_114: ## %else112 +; AVX512F-NEXT: kshiftlw $6, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_116 +; AVX512F-NEXT: ## BB#115: ## %cond.store113 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $9, %xmm0, 57(%rdi) +; AVX512F-NEXT: LBB58_116: ## %else114 +; AVX512F-NEXT: kshiftlw $5, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_118 +; AVX512F-NEXT: ## BB#117: ## %cond.store115 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $10, %xmm0, 58(%rdi) +; AVX512F-NEXT: LBB58_118: ## %else116 +; AVX512F-NEXT: kshiftlw $4, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_120 +; AVX512F-NEXT: ## BB#119: ## %cond.store117 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $11, %xmm0, 59(%rdi) +; AVX512F-NEXT: LBB58_120: ## %else118 +; AVX512F-NEXT: kshiftlw $3, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_122 +; AVX512F-NEXT: ## BB#121: ## %cond.store119 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $12, %xmm0, 60(%rdi) +; AVX512F-NEXT: LBB58_122: ## %else120 +; AVX512F-NEXT: kshiftlw $2, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_124 +; AVX512F-NEXT: ## BB#123: ## %cond.store121 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $13, %xmm0, 61(%rdi) +; AVX512F-NEXT: LBB58_124: ## %else122 +; AVX512F-NEXT: kshiftlw $1, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_126 +; AVX512F-NEXT: ## BB#125: ## %cond.store123 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $14, %xmm0, 62(%rdi) +; AVX512F-NEXT: LBB58_126: ## %else124 +; AVX512F-NEXT: kshiftlw $0, %k1, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB58_128 +; AVX512F-NEXT: ## BB#127: ## %cond.store125 +; AVX512F-NEXT: vextracti128 $1, %ymm5, %xmm0 +; AVX512F-NEXT: vpextrb $15, %xmm0, 63(%rdi) +; AVX512F-NEXT: LBB58_128: ## %else126 +; AVX512F-NEXT: retq +; ; SKX-LABEL: test_mask_store_64xi8: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 @@ -2331,6 +9221,131 @@ define void @test_mask_store_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x i8> declare void @llvm.masked.store.v64i8.p0v64i8(<64 x i8>, <64 x i8>*, i32, <64 x i1>) define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> %val) { +; AVX-LABEL: test_mask_store_8xi16: +; AVX: ## BB#0: +; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB59_2 +; AVX-NEXT: ## BB#1: ## %cond.store +; AVX-NEXT: vmovd %xmm1, %eax +; AVX-NEXT: movw %ax, (%rdi) +; AVX-NEXT: LBB59_2: ## %else +; AVX-NEXT: vpextrb $2, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB59_4 +; AVX-NEXT: ## BB#3: ## %cond.store1 +; AVX-NEXT: vpextrw $1, %xmm1, 2(%rdi) +; AVX-NEXT: LBB59_4: ## %else2 +; AVX-NEXT: vpextrb $4, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB59_6 +; AVX-NEXT: ## BB#5: ## %cond.store3 +; AVX-NEXT: vpextrw $2, %xmm1, 4(%rdi) +; AVX-NEXT: LBB59_6: ## %else4 +; AVX-NEXT: vpextrb $6, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB59_8 +; AVX-NEXT: ## BB#7: ## %cond.store5 +; AVX-NEXT: vpextrw $3, %xmm1, 6(%rdi) +; AVX-NEXT: LBB59_8: ## %else6 +; AVX-NEXT: vpextrb $8, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB59_10 +; AVX-NEXT: ## BB#9: ## %cond.store7 +; AVX-NEXT: vpextrw $4, %xmm1, 8(%rdi) +; AVX-NEXT: LBB59_10: ## %else8 +; AVX-NEXT: vpextrb $10, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB59_12 +; AVX-NEXT: ## BB#11: ## %cond.store9 +; AVX-NEXT: vpextrw $5, %xmm1, 10(%rdi) +; AVX-NEXT: LBB59_12: ## %else10 +; AVX-NEXT: vpextrb $12, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB59_14 +; AVX-NEXT: ## BB#13: ## %cond.store11 +; AVX-NEXT: vpextrw $6, %xmm1, 12(%rdi) +; AVX-NEXT: LBB59_14: ## %else12 +; AVX-NEXT: vpextrb $14, %xmm0, %eax +; AVX-NEXT: testb $1, %al +; AVX-NEXT: je LBB59_16 +; AVX-NEXT: ## BB#15: ## %cond.store13 +; AVX-NEXT: vpextrw $7, %xmm1, 14(%rdi) +; AVX-NEXT: LBB59_16: ## %else14 +; AVX-NEXT: retq +; +; AVX512F-LABEL: test_mask_store_8xi16: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpmovsxwq %xmm0, %zmm0 +; AVX512F-NEXT: vpsllq $63, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB59_2 +; AVX512F-NEXT: ## BB#1: ## %cond.store +; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: movw %ax, (%rdi) +; AVX512F-NEXT: LBB59_2: ## %else +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB59_4 +; AVX512F-NEXT: ## BB#3: ## %cond.store1 +; AVX512F-NEXT: vpextrw $1, %xmm1, 2(%rdi) +; AVX512F-NEXT: LBB59_4: ## %else2 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB59_6 +; AVX512F-NEXT: ## BB#5: ## %cond.store3 +; AVX512F-NEXT: vpextrw $2, %xmm1, 4(%rdi) +; AVX512F-NEXT: LBB59_6: ## %else4 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB59_8 +; AVX512F-NEXT: ## BB#7: ## %cond.store5 +; AVX512F-NEXT: vpextrw $3, %xmm1, 6(%rdi) +; AVX512F-NEXT: LBB59_8: ## %else6 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB59_10 +; AVX512F-NEXT: ## BB#9: ## %cond.store7 +; AVX512F-NEXT: vpextrw $4, %xmm1, 8(%rdi) +; AVX512F-NEXT: LBB59_10: ## %else8 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB59_12 +; AVX512F-NEXT: ## BB#11: ## %cond.store9 +; AVX512F-NEXT: vpextrw $5, %xmm1, 10(%rdi) +; AVX512F-NEXT: LBB59_12: ## %else10 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB59_14 +; AVX512F-NEXT: ## BB#13: ## %cond.store11 +; AVX512F-NEXT: vpextrw $6, %xmm1, 12(%rdi) +; AVX512F-NEXT: LBB59_14: ## %else12 +; AVX512F-NEXT: kshiftlw $8, %k0, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB59_16 +; AVX512F-NEXT: ## BB#15: ## %cond.store13 +; AVX512F-NEXT: vpextrw $7, %xmm1, 14(%rdi) +; AVX512F-NEXT: LBB59_16: ## %else14 +; AVX512F-NEXT: retq +; ; SKX-LABEL: test_mask_store_8xi16: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 @@ -2343,6 +9358,373 @@ define void @test_mask_store_8xi16(<8 x i1> %mask, <8 x i16>* %addr, <8 x i16> % declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32, <8 x i1>) define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i16> %val) { +; AVX1-LABEL: test_mask_store_16xi16: +; AVX1: ## BB#0: +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_2 +; AVX1-NEXT: ## BB#1: ## %cond.store +; AVX1-NEXT: vmovd %xmm1, %eax +; AVX1-NEXT: movw %ax, (%rdi) +; AVX1-NEXT: LBB60_2: ## %else +; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_4 +; AVX1-NEXT: ## BB#3: ## %cond.store1 +; AVX1-NEXT: vpextrw $1, %xmm1, 2(%rdi) +; AVX1-NEXT: LBB60_4: ## %else2 +; AVX1-NEXT: vpextrb $2, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_6 +; AVX1-NEXT: ## BB#5: ## %cond.store3 +; AVX1-NEXT: vpextrw $2, %xmm1, 4(%rdi) +; AVX1-NEXT: LBB60_6: ## %else4 +; AVX1-NEXT: vpextrb $3, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_8 +; AVX1-NEXT: ## BB#7: ## %cond.store5 +; AVX1-NEXT: vpextrw $3, %xmm1, 6(%rdi) +; AVX1-NEXT: LBB60_8: ## %else6 +; AVX1-NEXT: vpextrb $4, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_10 +; AVX1-NEXT: ## BB#9: ## %cond.store7 +; AVX1-NEXT: vpextrw $4, %xmm1, 8(%rdi) +; AVX1-NEXT: LBB60_10: ## %else8 +; AVX1-NEXT: vpextrb $5, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_12 +; AVX1-NEXT: ## BB#11: ## %cond.store9 +; AVX1-NEXT: vpextrw $5, %xmm1, 10(%rdi) +; AVX1-NEXT: LBB60_12: ## %else10 +; AVX1-NEXT: vpextrb $6, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_14 +; AVX1-NEXT: ## BB#13: ## %cond.store11 +; AVX1-NEXT: vpextrw $6, %xmm1, 12(%rdi) +; AVX1-NEXT: LBB60_14: ## %else12 +; AVX1-NEXT: vpextrb $7, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_16 +; AVX1-NEXT: ## BB#15: ## %cond.store13 +; AVX1-NEXT: vpextrw $7, %xmm1, 14(%rdi) +; AVX1-NEXT: LBB60_16: ## %else14 +; AVX1-NEXT: vpextrb $8, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_18 +; AVX1-NEXT: ## BB#17: ## %cond.store15 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vmovd %xmm2, %eax +; AVX1-NEXT: movw %ax, 16(%rdi) +; AVX1-NEXT: LBB60_18: ## %else16 +; AVX1-NEXT: vpextrb $9, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_20 +; AVX1-NEXT: ## BB#19: ## %cond.store17 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrw $1, %xmm2, 18(%rdi) +; AVX1-NEXT: LBB60_20: ## %else18 +; AVX1-NEXT: vpextrb $10, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_22 +; AVX1-NEXT: ## BB#21: ## %cond.store19 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrw $2, %xmm2, 20(%rdi) +; AVX1-NEXT: LBB60_22: ## %else20 +; AVX1-NEXT: vpextrb $11, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_24 +; AVX1-NEXT: ## BB#23: ## %cond.store21 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrw $3, %xmm2, 22(%rdi) +; AVX1-NEXT: LBB60_24: ## %else22 +; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_26 +; AVX1-NEXT: ## BB#25: ## %cond.store23 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrw $4, %xmm2, 24(%rdi) +; AVX1-NEXT: LBB60_26: ## %else24 +; AVX1-NEXT: vpextrb $13, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_28 +; AVX1-NEXT: ## BB#27: ## %cond.store25 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrw $5, %xmm2, 26(%rdi) +; AVX1-NEXT: LBB60_28: ## %else26 +; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_30 +; AVX1-NEXT: ## BB#29: ## %cond.store27 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpextrw $6, %xmm2, 28(%rdi) +; AVX1-NEXT: LBB60_30: ## %else28 +; AVX1-NEXT: vpextrb $15, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB60_32 +; AVX1-NEXT: ## BB#31: ## %cond.store29 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 +; AVX1-NEXT: vpextrw $7, %xmm0, 30(%rdi) +; AVX1-NEXT: LBB60_32: ## %else30 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_mask_store_16xi16: +; AVX2: ## BB#0: +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_2 +; AVX2-NEXT: ## BB#1: ## %cond.store +; AVX2-NEXT: vmovd %xmm1, %eax +; AVX2-NEXT: movw %ax, (%rdi) +; AVX2-NEXT: LBB60_2: ## %else +; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_4 +; AVX2-NEXT: ## BB#3: ## %cond.store1 +; AVX2-NEXT: vpextrw $1, %xmm1, 2(%rdi) +; AVX2-NEXT: LBB60_4: ## %else2 +; AVX2-NEXT: vpextrb $2, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_6 +; AVX2-NEXT: ## BB#5: ## %cond.store3 +; AVX2-NEXT: vpextrw $2, %xmm1, 4(%rdi) +; AVX2-NEXT: LBB60_6: ## %else4 +; AVX2-NEXT: vpextrb $3, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_8 +; AVX2-NEXT: ## BB#7: ## %cond.store5 +; AVX2-NEXT: vpextrw $3, %xmm1, 6(%rdi) +; AVX2-NEXT: LBB60_8: ## %else6 +; AVX2-NEXT: vpextrb $4, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_10 +; AVX2-NEXT: ## BB#9: ## %cond.store7 +; AVX2-NEXT: vpextrw $4, %xmm1, 8(%rdi) +; AVX2-NEXT: LBB60_10: ## %else8 +; AVX2-NEXT: vpextrb $5, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_12 +; AVX2-NEXT: ## BB#11: ## %cond.store9 +; AVX2-NEXT: vpextrw $5, %xmm1, 10(%rdi) +; AVX2-NEXT: LBB60_12: ## %else10 +; AVX2-NEXT: vpextrb $6, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_14 +; AVX2-NEXT: ## BB#13: ## %cond.store11 +; AVX2-NEXT: vpextrw $6, %xmm1, 12(%rdi) +; AVX2-NEXT: LBB60_14: ## %else12 +; AVX2-NEXT: vpextrb $7, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_16 +; AVX2-NEXT: ## BB#15: ## %cond.store13 +; AVX2-NEXT: vpextrw $7, %xmm1, 14(%rdi) +; AVX2-NEXT: LBB60_16: ## %else14 +; AVX2-NEXT: vpextrb $8, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_18 +; AVX2-NEXT: ## BB#17: ## %cond.store15 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vmovd %xmm2, %eax +; AVX2-NEXT: movw %ax, 16(%rdi) +; AVX2-NEXT: LBB60_18: ## %else16 +; AVX2-NEXT: vpextrb $9, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_20 +; AVX2-NEXT: ## BB#19: ## %cond.store17 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrw $1, %xmm2, 18(%rdi) +; AVX2-NEXT: LBB60_20: ## %else18 +; AVX2-NEXT: vpextrb $10, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_22 +; AVX2-NEXT: ## BB#21: ## %cond.store19 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrw $2, %xmm2, 20(%rdi) +; AVX2-NEXT: LBB60_22: ## %else20 +; AVX2-NEXT: vpextrb $11, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_24 +; AVX2-NEXT: ## BB#23: ## %cond.store21 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrw $3, %xmm2, 22(%rdi) +; AVX2-NEXT: LBB60_24: ## %else22 +; AVX2-NEXT: vpextrb $12, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_26 +; AVX2-NEXT: ## BB#25: ## %cond.store23 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrw $4, %xmm2, 24(%rdi) +; AVX2-NEXT: LBB60_26: ## %else24 +; AVX2-NEXT: vpextrb $13, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_28 +; AVX2-NEXT: ## BB#27: ## %cond.store25 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrw $5, %xmm2, 26(%rdi) +; AVX2-NEXT: LBB60_28: ## %else26 +; AVX2-NEXT: vpextrb $14, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_30 +; AVX2-NEXT: ## BB#29: ## %cond.store27 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 +; AVX2-NEXT: vpextrw $6, %xmm2, 28(%rdi) +; AVX2-NEXT: LBB60_30: ## %else28 +; AVX2-NEXT: vpextrb $15, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB60_32 +; AVX2-NEXT: ## BB#31: ## %cond.store29 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX2-NEXT: vpextrw $7, %xmm0, 30(%rdi) +; AVX2-NEXT: LBB60_32: ## %else30 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: test_mask_store_16xi16: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_2 +; AVX512F-NEXT: ## BB#1: ## %cond.store +; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: movw %ax, (%rdi) +; AVX512F-NEXT: LBB60_2: ## %else +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_4 +; AVX512F-NEXT: ## BB#3: ## %cond.store1 +; AVX512F-NEXT: vpextrw $1, %xmm1, 2(%rdi) +; AVX512F-NEXT: LBB60_4: ## %else2 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_6 +; AVX512F-NEXT: ## BB#5: ## %cond.store3 +; AVX512F-NEXT: vpextrw $2, %xmm1, 4(%rdi) +; AVX512F-NEXT: LBB60_6: ## %else4 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_8 +; AVX512F-NEXT: ## BB#7: ## %cond.store5 +; AVX512F-NEXT: vpextrw $3, %xmm1, 6(%rdi) +; AVX512F-NEXT: LBB60_8: ## %else6 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_10 +; AVX512F-NEXT: ## BB#9: ## %cond.store7 +; AVX512F-NEXT: vpextrw $4, %xmm1, 8(%rdi) +; AVX512F-NEXT: LBB60_10: ## %else8 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_12 +; AVX512F-NEXT: ## BB#11: ## %cond.store9 +; AVX512F-NEXT: vpextrw $5, %xmm1, 10(%rdi) +; AVX512F-NEXT: LBB60_12: ## %else10 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_14 +; AVX512F-NEXT: ## BB#13: ## %cond.store11 +; AVX512F-NEXT: vpextrw $6, %xmm1, 12(%rdi) +; AVX512F-NEXT: LBB60_14: ## %else12 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_16 +; AVX512F-NEXT: ## BB#15: ## %cond.store13 +; AVX512F-NEXT: vpextrw $7, %xmm1, 14(%rdi) +; AVX512F-NEXT: LBB60_16: ## %else14 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_18 +; AVX512F-NEXT: ## BB#17: ## %cond.store15 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX512F-NEXT: vmovd %xmm0, %eax +; AVX512F-NEXT: movw %ax, 16(%rdi) +; AVX512F-NEXT: LBB60_18: ## %else16 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_20 +; AVX512F-NEXT: ## BB#19: ## %cond.store17 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX512F-NEXT: vpextrw $1, %xmm0, 18(%rdi) +; AVX512F-NEXT: LBB60_20: ## %else18 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_22 +; AVX512F-NEXT: ## BB#21: ## %cond.store19 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX512F-NEXT: vpextrw $2, %xmm0, 20(%rdi) +; AVX512F-NEXT: LBB60_22: ## %else20 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_24 +; AVX512F-NEXT: ## BB#23: ## %cond.store21 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX512F-NEXT: vpextrw $3, %xmm0, 22(%rdi) +; AVX512F-NEXT: LBB60_24: ## %else22 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_26 +; AVX512F-NEXT: ## BB#25: ## %cond.store23 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX512F-NEXT: vpextrw $4, %xmm0, 24(%rdi) +; AVX512F-NEXT: LBB60_26: ## %else24 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_28 +; AVX512F-NEXT: ## BB#27: ## %cond.store25 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX512F-NEXT: vpextrw $5, %xmm0, 26(%rdi) +; AVX512F-NEXT: LBB60_28: ## %else26 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_30 +; AVX512F-NEXT: ## BB#29: ## %cond.store27 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX512F-NEXT: vpextrw $6, %xmm0, 28(%rdi) +; AVX512F-NEXT: LBB60_30: ## %else28 +; AVX512F-NEXT: kshiftlw $0, %k0, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: testb %al, %al +; AVX512F-NEXT: je LBB60_32 +; AVX512F-NEXT: ## BB#31: ## %cond.store29 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 +; AVX512F-NEXT: vpextrw $7, %xmm0, 30(%rdi) +; AVX512F-NEXT: LBB60_32: ## %else30 +; AVX512F-NEXT: retq +; ; SKX-LABEL: test_mask_store_16xi16: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 @@ -2355,6 +9737,659 @@ define void @test_mask_store_16xi16(<16 x i1> %mask, <16 x i16>* %addr, <16 x i1 declare void @llvm.masked.store.v16i16.p0v16i16(<16 x i16>, <16 x i16>*, i32, <16 x i1>) define void @test_mask_store_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32 x i16> %val) { +; AVX1-LABEL: test_mask_store_32xi16: +; AVX1: ## BB#0: +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_2 +; AVX1-NEXT: ## BB#1: ## %cond.store +; AVX1-NEXT: vmovd %xmm1, %eax +; AVX1-NEXT: movw %ax, (%rdi) +; AVX1-NEXT: LBB61_2: ## %else +; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_4 +; AVX1-NEXT: ## BB#3: ## %cond.store1 +; AVX1-NEXT: vpextrw $1, %xmm1, 2(%rdi) +; AVX1-NEXT: LBB61_4: ## %else2 +; AVX1-NEXT: vpextrb $2, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_6 +; AVX1-NEXT: ## BB#5: ## %cond.store3 +; AVX1-NEXT: vpextrw $2, %xmm1, 4(%rdi) +; AVX1-NEXT: LBB61_6: ## %else4 +; AVX1-NEXT: vpextrb $3, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_8 +; AVX1-NEXT: ## BB#7: ## %cond.store5 +; AVX1-NEXT: vpextrw $3, %xmm1, 6(%rdi) +; AVX1-NEXT: LBB61_8: ## %else6 +; AVX1-NEXT: vpextrb $4, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_10 +; AVX1-NEXT: ## BB#9: ## %cond.store7 +; AVX1-NEXT: vpextrw $4, %xmm1, 8(%rdi) +; AVX1-NEXT: LBB61_10: ## %else8 +; AVX1-NEXT: vpextrb $5, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_12 +; AVX1-NEXT: ## BB#11: ## %cond.store9 +; AVX1-NEXT: vpextrw $5, %xmm1, 10(%rdi) +; AVX1-NEXT: LBB61_12: ## %else10 +; AVX1-NEXT: vpextrb $6, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_14 +; AVX1-NEXT: ## BB#13: ## %cond.store11 +; AVX1-NEXT: vpextrw $6, %xmm1, 12(%rdi) +; AVX1-NEXT: LBB61_14: ## %else12 +; AVX1-NEXT: vpextrb $7, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_16 +; AVX1-NEXT: ## BB#15: ## %cond.store13 +; AVX1-NEXT: vpextrw $7, %xmm1, 14(%rdi) +; AVX1-NEXT: LBB61_16: ## %else14 +; AVX1-NEXT: vpextrb $8, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_18 +; AVX1-NEXT: ## BB#17: ## %cond.store15 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vmovd %xmm3, %eax +; AVX1-NEXT: movw %ax, 16(%rdi) +; AVX1-NEXT: LBB61_18: ## %else16 +; AVX1-NEXT: vpextrb $9, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_20 +; AVX1-NEXT: ## BB#19: ## %cond.store17 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpextrw $1, %xmm3, 18(%rdi) +; AVX1-NEXT: LBB61_20: ## %else18 +; AVX1-NEXT: vpextrb $10, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_22 +; AVX1-NEXT: ## BB#21: ## %cond.store19 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpextrw $2, %xmm3, 20(%rdi) +; AVX1-NEXT: LBB61_22: ## %else20 +; AVX1-NEXT: vpextrb $11, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_24 +; AVX1-NEXT: ## BB#23: ## %cond.store21 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpextrw $3, %xmm3, 22(%rdi) +; AVX1-NEXT: LBB61_24: ## %else22 +; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_26 +; AVX1-NEXT: ## BB#25: ## %cond.store23 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpextrw $4, %xmm3, 24(%rdi) +; AVX1-NEXT: LBB61_26: ## %else24 +; AVX1-NEXT: vpextrb $13, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_28 +; AVX1-NEXT: ## BB#27: ## %cond.store25 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpextrw $5, %xmm3, 26(%rdi) +; AVX1-NEXT: LBB61_28: ## %else26 +; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_30 +; AVX1-NEXT: ## BB#29: ## %cond.store27 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpextrw $6, %xmm3, 28(%rdi) +; AVX1-NEXT: LBB61_30: ## %else28 +; AVX1-NEXT: vpextrb $15, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_32 +; AVX1-NEXT: ## BB#31: ## %cond.store29 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vpextrw $7, %xmm1, 30(%rdi) +; AVX1-NEXT: LBB61_32: ## %else30 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_34 +; AVX1-NEXT: ## BB#33: ## %cond.store31 +; AVX1-NEXT: vmovd %xmm2, %eax +; AVX1-NEXT: movw %ax, 32(%rdi) +; AVX1-NEXT: LBB61_34: ## %else32 +; AVX1-NEXT: vpextrb $1, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_36 +; AVX1-NEXT: ## BB#35: ## %cond.store33 +; AVX1-NEXT: vpextrw $1, %xmm2, 34(%rdi) +; AVX1-NEXT: LBB61_36: ## %else34 +; AVX1-NEXT: vpextrb $2, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_38 +; AVX1-NEXT: ## BB#37: ## %cond.store35 +; AVX1-NEXT: vpextrw $2, %xmm2, 36(%rdi) +; AVX1-NEXT: LBB61_38: ## %else36 +; AVX1-NEXT: vpextrb $3, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_40 +; AVX1-NEXT: ## BB#39: ## %cond.store37 +; AVX1-NEXT: vpextrw $3, %xmm2, 38(%rdi) +; AVX1-NEXT: LBB61_40: ## %else38 +; AVX1-NEXT: vpextrb $4, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_42 +; AVX1-NEXT: ## BB#41: ## %cond.store39 +; AVX1-NEXT: vpextrw $4, %xmm2, 40(%rdi) +; AVX1-NEXT: LBB61_42: ## %else40 +; AVX1-NEXT: vpextrb $5, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_44 +; AVX1-NEXT: ## BB#43: ## %cond.store41 +; AVX1-NEXT: vpextrw $5, %xmm2, 42(%rdi) +; AVX1-NEXT: LBB61_44: ## %else42 +; AVX1-NEXT: vpextrb $6, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_46 +; AVX1-NEXT: ## BB#45: ## %cond.store43 +; AVX1-NEXT: vpextrw $6, %xmm2, 44(%rdi) +; AVX1-NEXT: LBB61_46: ## %else44 +; AVX1-NEXT: vpextrb $7, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_48 +; AVX1-NEXT: ## BB#47: ## %cond.store45 +; AVX1-NEXT: vpextrw $7, %xmm2, 46(%rdi) +; AVX1-NEXT: LBB61_48: ## %else46 +; AVX1-NEXT: vpextrb $8, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_50 +; AVX1-NEXT: ## BB#49: ## %cond.store47 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 +; AVX1-NEXT: vmovd %xmm1, %eax +; AVX1-NEXT: movw %ax, 48(%rdi) +; AVX1-NEXT: LBB61_50: ## %else48 +; AVX1-NEXT: vpextrb $9, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_52 +; AVX1-NEXT: ## BB#51: ## %cond.store49 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 +; AVX1-NEXT: vpextrw $1, %xmm1, 50(%rdi) +; AVX1-NEXT: LBB61_52: ## %else50 +; AVX1-NEXT: vpextrb $10, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_54 +; AVX1-NEXT: ## BB#53: ## %cond.store51 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 +; AVX1-NEXT: vpextrw $2, %xmm1, 52(%rdi) +; AVX1-NEXT: LBB61_54: ## %else52 +; AVX1-NEXT: vpextrb $11, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_56 +; AVX1-NEXT: ## BB#55: ## %cond.store53 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 +; AVX1-NEXT: vpextrw $3, %xmm1, 54(%rdi) +; AVX1-NEXT: LBB61_56: ## %else54 +; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_58 +; AVX1-NEXT: ## BB#57: ## %cond.store55 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 +; AVX1-NEXT: vpextrw $4, %xmm1, 56(%rdi) +; AVX1-NEXT: LBB61_58: ## %else56 +; AVX1-NEXT: vpextrb $13, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_60 +; AVX1-NEXT: ## BB#59: ## %cond.store57 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 +; AVX1-NEXT: vpextrw $5, %xmm1, 58(%rdi) +; AVX1-NEXT: LBB61_60: ## %else58 +; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_62 +; AVX1-NEXT: ## BB#61: ## %cond.store59 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm1 +; AVX1-NEXT: vpextrw $6, %xmm1, 60(%rdi) +; AVX1-NEXT: LBB61_62: ## %else60 +; AVX1-NEXT: vpextrb $15, %xmm0, %eax +; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: je LBB61_64 +; AVX1-NEXT: ## BB#63: ## %cond.store61 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm0 +; AVX1-NEXT: vpextrw $7, %xmm0, 62(%rdi) +; AVX1-NEXT: LBB61_64: ## %else62 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: test_mask_store_32xi16: +; AVX2: ## BB#0: +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_2 +; AVX2-NEXT: ## BB#1: ## %cond.store +; AVX2-NEXT: vmovd %xmm1, %eax +; AVX2-NEXT: movw %ax, (%rdi) +; AVX2-NEXT: LBB61_2: ## %else +; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_4 +; AVX2-NEXT: ## BB#3: ## %cond.store1 +; AVX2-NEXT: vpextrw $1, %xmm1, 2(%rdi) +; AVX2-NEXT: LBB61_4: ## %else2 +; AVX2-NEXT: vpextrb $2, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_6 +; AVX2-NEXT: ## BB#5: ## %cond.store3 +; AVX2-NEXT: vpextrw $2, %xmm1, 4(%rdi) +; AVX2-NEXT: LBB61_6: ## %else4 +; AVX2-NEXT: vpextrb $3, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_8 +; AVX2-NEXT: ## BB#7: ## %cond.store5 +; AVX2-NEXT: vpextrw $3, %xmm1, 6(%rdi) +; AVX2-NEXT: LBB61_8: ## %else6 +; AVX2-NEXT: vpextrb $4, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_10 +; AVX2-NEXT: ## BB#9: ## %cond.store7 +; AVX2-NEXT: vpextrw $4, %xmm1, 8(%rdi) +; AVX2-NEXT: LBB61_10: ## %else8 +; AVX2-NEXT: vpextrb $5, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_12 +; AVX2-NEXT: ## BB#11: ## %cond.store9 +; AVX2-NEXT: vpextrw $5, %xmm1, 10(%rdi) +; AVX2-NEXT: LBB61_12: ## %else10 +; AVX2-NEXT: vpextrb $6, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_14 +; AVX2-NEXT: ## BB#13: ## %cond.store11 +; AVX2-NEXT: vpextrw $6, %xmm1, 12(%rdi) +; AVX2-NEXT: LBB61_14: ## %else12 +; AVX2-NEXT: vpextrb $7, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_16 +; AVX2-NEXT: ## BB#15: ## %cond.store13 +; AVX2-NEXT: vpextrw $7, %xmm1, 14(%rdi) +; AVX2-NEXT: LBB61_16: ## %else14 +; AVX2-NEXT: vpextrb $8, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_18 +; AVX2-NEXT: ## BB#17: ## %cond.store15 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vmovd %xmm3, %eax +; AVX2-NEXT: movw %ax, 16(%rdi) +; AVX2-NEXT: LBB61_18: ## %else16 +; AVX2-NEXT: vpextrb $9, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_20 +; AVX2-NEXT: ## BB#19: ## %cond.store17 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpextrw $1, %xmm3, 18(%rdi) +; AVX2-NEXT: LBB61_20: ## %else18 +; AVX2-NEXT: vpextrb $10, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_22 +; AVX2-NEXT: ## BB#21: ## %cond.store19 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpextrw $2, %xmm3, 20(%rdi) +; AVX2-NEXT: LBB61_22: ## %else20 +; AVX2-NEXT: vpextrb $11, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_24 +; AVX2-NEXT: ## BB#23: ## %cond.store21 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpextrw $3, %xmm3, 22(%rdi) +; AVX2-NEXT: LBB61_24: ## %else22 +; AVX2-NEXT: vpextrb $12, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_26 +; AVX2-NEXT: ## BB#25: ## %cond.store23 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpextrw $4, %xmm3, 24(%rdi) +; AVX2-NEXT: LBB61_26: ## %else24 +; AVX2-NEXT: vpextrb $13, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_28 +; AVX2-NEXT: ## BB#27: ## %cond.store25 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpextrw $5, %xmm3, 26(%rdi) +; AVX2-NEXT: LBB61_28: ## %else26 +; AVX2-NEXT: vpextrb $14, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_30 +; AVX2-NEXT: ## BB#29: ## %cond.store27 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpextrw $6, %xmm3, 28(%rdi) +; AVX2-NEXT: LBB61_30: ## %else28 +; AVX2-NEXT: vpextrb $15, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_32 +; AVX2-NEXT: ## BB#31: ## %cond.store29 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX2-NEXT: vpextrw $7, %xmm1, 30(%rdi) +; AVX2-NEXT: LBB61_32: ## %else30 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_34 +; AVX2-NEXT: ## BB#33: ## %cond.store31 +; AVX2-NEXT: vmovd %xmm2, %eax +; AVX2-NEXT: movw %ax, 32(%rdi) +; AVX2-NEXT: LBB61_34: ## %else32 +; AVX2-NEXT: vpextrb $1, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_36 +; AVX2-NEXT: ## BB#35: ## %cond.store33 +; AVX2-NEXT: vpextrw $1, %xmm2, 34(%rdi) +; AVX2-NEXT: LBB61_36: ## %else34 +; AVX2-NEXT: vpextrb $2, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_38 +; AVX2-NEXT: ## BB#37: ## %cond.store35 +; AVX2-NEXT: vpextrw $2, %xmm2, 36(%rdi) +; AVX2-NEXT: LBB61_38: ## %else36 +; AVX2-NEXT: vpextrb $3, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_40 +; AVX2-NEXT: ## BB#39: ## %cond.store37 +; AVX2-NEXT: vpextrw $3, %xmm2, 38(%rdi) +; AVX2-NEXT: LBB61_40: ## %else38 +; AVX2-NEXT: vpextrb $4, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_42 +; AVX2-NEXT: ## BB#41: ## %cond.store39 +; AVX2-NEXT: vpextrw $4, %xmm2, 40(%rdi) +; AVX2-NEXT: LBB61_42: ## %else40 +; AVX2-NEXT: vpextrb $5, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_44 +; AVX2-NEXT: ## BB#43: ## %cond.store41 +; AVX2-NEXT: vpextrw $5, %xmm2, 42(%rdi) +; AVX2-NEXT: LBB61_44: ## %else42 +; AVX2-NEXT: vpextrb $6, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_46 +; AVX2-NEXT: ## BB#45: ## %cond.store43 +; AVX2-NEXT: vpextrw $6, %xmm2, 44(%rdi) +; AVX2-NEXT: LBB61_46: ## %else44 +; AVX2-NEXT: vpextrb $7, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_48 +; AVX2-NEXT: ## BB#47: ## %cond.store45 +; AVX2-NEXT: vpextrw $7, %xmm2, 46(%rdi) +; AVX2-NEXT: LBB61_48: ## %else46 +; AVX2-NEXT: vpextrb $8, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_50 +; AVX2-NEXT: ## BB#49: ## %cond.store47 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX2-NEXT: vmovd %xmm1, %eax +; AVX2-NEXT: movw %ax, 48(%rdi) +; AVX2-NEXT: LBB61_50: ## %else48 +; AVX2-NEXT: vpextrb $9, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_52 +; AVX2-NEXT: ## BB#51: ## %cond.store49 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX2-NEXT: vpextrw $1, %xmm1, 50(%rdi) +; AVX2-NEXT: LBB61_52: ## %else50 +; AVX2-NEXT: vpextrb $10, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_54 +; AVX2-NEXT: ## BB#53: ## %cond.store51 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX2-NEXT: vpextrw $2, %xmm1, 52(%rdi) +; AVX2-NEXT: LBB61_54: ## %else52 +; AVX2-NEXT: vpextrb $11, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_56 +; AVX2-NEXT: ## BB#55: ## %cond.store53 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX2-NEXT: vpextrw $3, %xmm1, 54(%rdi) +; AVX2-NEXT: LBB61_56: ## %else54 +; AVX2-NEXT: vpextrb $12, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_58 +; AVX2-NEXT: ## BB#57: ## %cond.store55 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX2-NEXT: vpextrw $4, %xmm1, 56(%rdi) +; AVX2-NEXT: LBB61_58: ## %else56 +; AVX2-NEXT: vpextrb $13, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_60 +; AVX2-NEXT: ## BB#59: ## %cond.store57 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX2-NEXT: vpextrw $5, %xmm1, 58(%rdi) +; AVX2-NEXT: LBB61_60: ## %else58 +; AVX2-NEXT: vpextrb $14, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_62 +; AVX2-NEXT: ## BB#61: ## %cond.store59 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX2-NEXT: vpextrw $6, %xmm1, 60(%rdi) +; AVX2-NEXT: LBB61_62: ## %else60 +; AVX2-NEXT: vpextrb $15, %xmm0, %eax +; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: je LBB61_64 +; AVX2-NEXT: ## BB#63: ## %cond.store61 +; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm0 +; AVX2-NEXT: vpextrw $7, %xmm0, 62(%rdi) +; AVX2-NEXT: LBB61_64: ## %else62 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: test_mask_store_32xi16: +; AVX512F: ## BB#0: +; AVX512F-NEXT: vpextrb $0, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_2 +; AVX512F-NEXT: ## BB#1: ## %cond.store +; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: movw %ax, (%rdi) +; AVX512F-NEXT: LBB61_2: ## %else +; AVX512F-NEXT: vpextrb $1, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_4 +; AVX512F-NEXT: ## BB#3: ## %cond.store1 +; AVX512F-NEXT: vpextrw $1, %xmm1, 2(%rdi) +; AVX512F-NEXT: LBB61_4: ## %else2 +; AVX512F-NEXT: vpextrb $2, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_6 +; AVX512F-NEXT: ## BB#5: ## %cond.store3 +; AVX512F-NEXT: vpextrw $2, %xmm1, 4(%rdi) +; AVX512F-NEXT: LBB61_6: ## %else4 +; AVX512F-NEXT: vpextrb $3, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_8 +; AVX512F-NEXT: ## BB#7: ## %cond.store5 +; AVX512F-NEXT: vpextrw $3, %xmm1, 6(%rdi) +; AVX512F-NEXT: LBB61_8: ## %else6 +; AVX512F-NEXT: vpextrb $4, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_10 +; AVX512F-NEXT: ## BB#9: ## %cond.store7 +; AVX512F-NEXT: vpextrw $4, %xmm1, 8(%rdi) +; AVX512F-NEXT: LBB61_10: ## %else8 +; AVX512F-NEXT: vpextrb $5, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_12 +; AVX512F-NEXT: ## BB#11: ## %cond.store9 +; AVX512F-NEXT: vpextrw $5, %xmm1, 10(%rdi) +; AVX512F-NEXT: LBB61_12: ## %else10 +; AVX512F-NEXT: vpextrb $6, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_14 +; AVX512F-NEXT: ## BB#13: ## %cond.store11 +; AVX512F-NEXT: vpextrw $6, %xmm1, 12(%rdi) +; AVX512F-NEXT: LBB61_14: ## %else12 +; AVX512F-NEXT: vpextrb $7, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_16 +; AVX512F-NEXT: ## BB#15: ## %cond.store13 +; AVX512F-NEXT: vpextrw $7, %xmm1, 14(%rdi) +; AVX512F-NEXT: LBB61_16: ## %else14 +; AVX512F-NEXT: vpextrb $8, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_18 +; AVX512F-NEXT: ## BB#17: ## %cond.store15 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vmovd %xmm3, %eax +; AVX512F-NEXT: movw %ax, 16(%rdi) +; AVX512F-NEXT: LBB61_18: ## %else16 +; AVX512F-NEXT: vpextrb $9, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_20 +; AVX512F-NEXT: ## BB#19: ## %cond.store17 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpextrw $1, %xmm3, 18(%rdi) +; AVX512F-NEXT: LBB61_20: ## %else18 +; AVX512F-NEXT: vpextrb $10, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_22 +; AVX512F-NEXT: ## BB#21: ## %cond.store19 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpextrw $2, %xmm3, 20(%rdi) +; AVX512F-NEXT: LBB61_22: ## %else20 +; AVX512F-NEXT: vpextrb $11, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_24 +; AVX512F-NEXT: ## BB#23: ## %cond.store21 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpextrw $3, %xmm3, 22(%rdi) +; AVX512F-NEXT: LBB61_24: ## %else22 +; AVX512F-NEXT: vpextrb $12, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_26 +; AVX512F-NEXT: ## BB#25: ## %cond.store23 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpextrw $4, %xmm3, 24(%rdi) +; AVX512F-NEXT: LBB61_26: ## %else24 +; AVX512F-NEXT: vpextrb $13, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_28 +; AVX512F-NEXT: ## BB#27: ## %cond.store25 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpextrw $5, %xmm3, 26(%rdi) +; AVX512F-NEXT: LBB61_28: ## %else26 +; AVX512F-NEXT: vpextrb $14, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_30 +; AVX512F-NEXT: ## BB#29: ## %cond.store27 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX512F-NEXT: vpextrw $6, %xmm3, 28(%rdi) +; AVX512F-NEXT: LBB61_30: ## %else28 +; AVX512F-NEXT: vpextrb $15, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_32 +; AVX512F-NEXT: ## BB#31: ## %cond.store29 +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX512F-NEXT: vpextrw $7, %xmm1, 30(%rdi) +; AVX512F-NEXT: LBB61_32: ## %else30 +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vpextrb $0, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_34 +; AVX512F-NEXT: ## BB#33: ## %cond.store31 +; AVX512F-NEXT: vmovd %xmm2, %eax +; AVX512F-NEXT: movw %ax, 32(%rdi) +; AVX512F-NEXT: LBB61_34: ## %else32 +; AVX512F-NEXT: vpextrb $1, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_36 +; AVX512F-NEXT: ## BB#35: ## %cond.store33 +; AVX512F-NEXT: vpextrw $1, %xmm2, 34(%rdi) +; AVX512F-NEXT: LBB61_36: ## %else34 +; AVX512F-NEXT: vpextrb $2, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_38 +; AVX512F-NEXT: ## BB#37: ## %cond.store35 +; AVX512F-NEXT: vpextrw $2, %xmm2, 36(%rdi) +; AVX512F-NEXT: LBB61_38: ## %else36 +; AVX512F-NEXT: vpextrb $3, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_40 +; AVX512F-NEXT: ## BB#39: ## %cond.store37 +; AVX512F-NEXT: vpextrw $3, %xmm2, 38(%rdi) +; AVX512F-NEXT: LBB61_40: ## %else38 +; AVX512F-NEXT: vpextrb $4, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_42 +; AVX512F-NEXT: ## BB#41: ## %cond.store39 +; AVX512F-NEXT: vpextrw $4, %xmm2, 40(%rdi) +; AVX512F-NEXT: LBB61_42: ## %else40 +; AVX512F-NEXT: vpextrb $5, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_44 +; AVX512F-NEXT: ## BB#43: ## %cond.store41 +; AVX512F-NEXT: vpextrw $5, %xmm2, 42(%rdi) +; AVX512F-NEXT: LBB61_44: ## %else42 +; AVX512F-NEXT: vpextrb $6, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_46 +; AVX512F-NEXT: ## BB#45: ## %cond.store43 +; AVX512F-NEXT: vpextrw $6, %xmm2, 44(%rdi) +; AVX512F-NEXT: LBB61_46: ## %else44 +; AVX512F-NEXT: vpextrb $7, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_48 +; AVX512F-NEXT: ## BB#47: ## %cond.store45 +; AVX512F-NEXT: vpextrw $7, %xmm2, 46(%rdi) +; AVX512F-NEXT: LBB61_48: ## %else46 +; AVX512F-NEXT: vpextrb $8, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_50 +; AVX512F-NEXT: ## BB#49: ## %cond.store47 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX512F-NEXT: vmovd %xmm1, %eax +; AVX512F-NEXT: movw %ax, 48(%rdi) +; AVX512F-NEXT: LBB61_50: ## %else48 +; AVX512F-NEXT: vpextrb $9, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_52 +; AVX512F-NEXT: ## BB#51: ## %cond.store49 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX512F-NEXT: vpextrw $1, %xmm1, 50(%rdi) +; AVX512F-NEXT: LBB61_52: ## %else50 +; AVX512F-NEXT: vpextrb $10, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_54 +; AVX512F-NEXT: ## BB#53: ## %cond.store51 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX512F-NEXT: vpextrw $2, %xmm1, 52(%rdi) +; AVX512F-NEXT: LBB61_54: ## %else52 +; AVX512F-NEXT: vpextrb $11, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_56 +; AVX512F-NEXT: ## BB#55: ## %cond.store53 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX512F-NEXT: vpextrw $3, %xmm1, 54(%rdi) +; AVX512F-NEXT: LBB61_56: ## %else54 +; AVX512F-NEXT: vpextrb $12, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_58 +; AVX512F-NEXT: ## BB#57: ## %cond.store55 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX512F-NEXT: vpextrw $4, %xmm1, 56(%rdi) +; AVX512F-NEXT: LBB61_58: ## %else56 +; AVX512F-NEXT: vpextrb $13, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_60 +; AVX512F-NEXT: ## BB#59: ## %cond.store57 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX512F-NEXT: vpextrw $5, %xmm1, 58(%rdi) +; AVX512F-NEXT: LBB61_60: ## %else58 +; AVX512F-NEXT: vpextrb $14, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_62 +; AVX512F-NEXT: ## BB#61: ## %cond.store59 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm1 +; AVX512F-NEXT: vpextrw $6, %xmm1, 60(%rdi) +; AVX512F-NEXT: LBB61_62: ## %else60 +; AVX512F-NEXT: vpextrb $15, %xmm0, %eax +; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: je LBB61_64 +; AVX512F-NEXT: ## BB#63: ## %cond.store61 +; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm0 +; AVX512F-NEXT: vpextrw $7, %xmm0, 62(%rdi) +; AVX512F-NEXT: LBB61_64: ## %else62 +; AVX512F-NEXT: retq +; ; SKX-LABEL: test_mask_store_32xi16: ; SKX: ## BB#0: ; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 |