diff options
author | Anton Afanasyev <anton.a.afanasyev@gmail.com> | 2019-05-22 07:41:34 +0000 |
---|---|---|
committer | Anton Afanasyev <anton.a.afanasyev@gmail.com> | 2019-05-22 07:41:34 +0000 |
commit | df00c6a54f274ca06775890774b23eee87e8a170 (patch) | |
tree | 2e4f6da3b297e5c7c74fad7157fadccb8cb8b183 /llvm/test/CodeGen/X86/masked_store.ll | |
parent | 1c61471ab1cba735f57824ec8a38db845e60ab84 (diff) | |
download | bcm5719-llvm-df00c6a54f274ca06775890774b23eee87e8a170.tar.gz bcm5719-llvm-df00c6a54f274ca06775890774b23eee87e8a170.zip |
[MIR] Add simple PRE pass to MachineCSE
This is the second part of the commit fixing PR38917 (hoisting
partitially redundant machine instruction). Most of PRE (partitial
redundancy elimination) and CSE work is done on LLVM IR, but some of
redundancy arises during DAG legalization. Machine CSE is not enough
to deal with it. This simple PRE implementation works a little bit
intricately: it passes before CSE, looking for partitial redundancy
and transforming it to fully redundancy, anticipating that the next
CSE step will eliminate this created redundancy. If CSE doesn't
eliminate this, than created instruction will remain dead and eliminated
later by Remove Dead Machine Instructions pass.
The third part of the commit is supposed to refactor MachineCSE,
to make it more clear and to merge MachinePRE with MachineCSE,
so one need no rely on further Remove Dead pass to clear instrs
not eliminated by CSE.
First step: https://reviews.llvm.org/D54839
Fixes llvm.org/PR38917
llvm-svn: 361356
Diffstat (limited to 'llvm/test/CodeGen/X86/masked_store.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/masked_store.ll | 722 |
1 files changed, 305 insertions, 417 deletions
diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll index efbb1ef8cc6..07a4fd96b2f 100644 --- a/llvm/test/CodeGen/X86/masked_store.ll +++ b/llvm/test/CodeGen/X86/masked_store.ll @@ -2151,69 +2151,62 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX1-NEXT: ## %bb.15: ## %cond.store13 ; AVX1-NEXT: vpextrw $7, %xmm1, 14(%rdi) ; AVX1-NEXT: LBB14_16: ## %else14 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpextrb $0, %xmm2, %eax +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqw %xmm0, %xmm2, %xmm3 +; AVX1-NEXT: vpextrb $0, %xmm3, %eax ; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 ; AVX1-NEXT: je LBB14_18 ; AVX1-NEXT: ## %bb.17: ## %cond.store15 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpextrw $0, %xmm3, 16(%rdi) +; AVX1-NEXT: vpextrw $0, %xmm0, 16(%rdi) ; AVX1-NEXT: LBB14_18: ## %else16 -; AVX1-NEXT: vpextrb $2, %xmm2, %eax +; AVX1-NEXT: vpextrb $2, %xmm3, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB14_20 ; AVX1-NEXT: ## %bb.19: ## %cond.store17 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpextrw $1, %xmm2, 18(%rdi) +; AVX1-NEXT: vpextrw $1, %xmm0, 18(%rdi) ; AVX1-NEXT: LBB14_20: ## %else18 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpextrb $4, %xmm2, %eax +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $4, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB14_22 ; AVX1-NEXT: ## %bb.21: ## %cond.store19 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpextrw $2, %xmm3, 20(%rdi) +; AVX1-NEXT: vpextrw $2, %xmm0, 20(%rdi) ; AVX1-NEXT: LBB14_22: ## %else20 -; AVX1-NEXT: vpextrb $6, %xmm2, %eax +; AVX1-NEXT: vpextrb $6, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB14_24 ; AVX1-NEXT: ## %bb.23: ## %cond.store21 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpextrw $3, %xmm2, 22(%rdi) +; AVX1-NEXT: vpextrw $3, %xmm0, 22(%rdi) ; AVX1-NEXT: LBB14_24: ## %else22 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpextrb $8, %xmm2, %eax +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $8, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB14_26 ; AVX1-NEXT: ## %bb.25: ## %cond.store23 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpextrw $4, %xmm3, 24(%rdi) +; AVX1-NEXT: vpextrw $4, %xmm0, 24(%rdi) ; AVX1-NEXT: LBB14_26: ## %else24 -; AVX1-NEXT: vpextrb $10, %xmm2, %eax +; AVX1-NEXT: vpextrb $10, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB14_28 ; AVX1-NEXT: ## %bb.27: ## %cond.store25 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpextrw $5, %xmm2, 26(%rdi) +; AVX1-NEXT: vpextrw $5, %xmm0, 26(%rdi) ; AVX1-NEXT: LBB14_28: ## %else26 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $12, %xmm0, %eax +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $12, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB14_30 ; AVX1-NEXT: ## %bb.29: ## %cond.store27 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpextrw $6, %xmm2, 28(%rdi) +; AVX1-NEXT: vpextrw $6, %xmm0, 28(%rdi) ; AVX1-NEXT: LBB14_30: ## %else28 -; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: vpextrb $14, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB14_32 ; AVX1-NEXT: ## %bb.31: ## %cond.store29 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 ; AVX1-NEXT: vpextrw $7, %xmm0, 30(%rdi) ; AVX1-NEXT: LBB14_32: ## %else30 ; AVX1-NEXT: vzeroupper @@ -2282,17 +2275,16 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 ; AVX2-NEXT: vpextrb $0, %xmm2, %eax ; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 ; AVX2-NEXT: je LBB14_18 ; AVX2-NEXT: ## %bb.17: ## %cond.store15 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpextrw $0, %xmm3, 16(%rdi) +; AVX2-NEXT: vpextrw $0, %xmm1, 16(%rdi) ; AVX2-NEXT: LBB14_18: ## %else16 ; AVX2-NEXT: vpextrb $2, %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB14_20 ; AVX2-NEXT: ## %bb.19: ## %cond.store17 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrw $1, %xmm2, 18(%rdi) +; AVX2-NEXT: vpextrw $1, %xmm1, 18(%rdi) ; AVX2-NEXT: LBB14_20: ## %else18 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 @@ -2301,15 +2293,13 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB14_22 ; AVX2-NEXT: ## %bb.21: ## %cond.store19 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpextrw $2, %xmm3, 20(%rdi) +; AVX2-NEXT: vpextrw $2, %xmm1, 20(%rdi) ; AVX2-NEXT: LBB14_22: ## %else20 ; AVX2-NEXT: vpextrb $6, %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB14_24 ; AVX2-NEXT: ## %bb.23: ## %cond.store21 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrw $3, %xmm2, 22(%rdi) +; AVX2-NEXT: vpextrw $3, %xmm1, 22(%rdi) ; AVX2-NEXT: LBB14_24: ## %else22 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 @@ -2318,15 +2308,13 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB14_26 ; AVX2-NEXT: ## %bb.25: ## %cond.store23 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpextrw $4, %xmm3, 24(%rdi) +; AVX2-NEXT: vpextrw $4, %xmm1, 24(%rdi) ; AVX2-NEXT: LBB14_26: ## %else24 ; AVX2-NEXT: vpextrb $10, %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB14_28 ; AVX2-NEXT: ## %bb.27: ## %cond.store25 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrw $5, %xmm2, 26(%rdi) +; AVX2-NEXT: vpextrw $5, %xmm1, 26(%rdi) ; AVX2-NEXT: LBB14_28: ## %else26 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 @@ -2335,15 +2323,13 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB14_30 ; AVX2-NEXT: ## %bb.29: ## %cond.store27 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrw $6, %xmm2, 28(%rdi) +; AVX2-NEXT: vpextrw $6, %xmm1, 28(%rdi) ; AVX2-NEXT: LBB14_30: ## %else28 ; AVX2-NEXT: vpextrb $14, %xmm0, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB14_32 ; AVX2-NEXT: ## %bb.31: ## %cond.store29 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 -; AVX2-NEXT: vpextrw $7, %xmm0, 30(%rdi) +; AVX2-NEXT: vpextrw $7, %xmm1, 30(%rdi) ; AVX2-NEXT: LBB14_32: ## %else30 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2428,18 +2414,17 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX512F-NEXT: kshiftrw $8, %k0, %k1 ; AVX512F-NEXT: kmovw %k1, %eax ; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1 ; AVX512F-NEXT: je LBB14_18 ; AVX512F-NEXT: ## %bb.17: ## %cond.store15 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrw $0, %xmm2, 16(%rdi) +; AVX512F-NEXT: vpextrw $0, %xmm1, 16(%rdi) ; AVX512F-NEXT: LBB14_18: ## %else16 ; AVX512F-NEXT: kshiftrw $9, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB14_20 ; AVX512F-NEXT: ## %bb.19: ## %cond.store17 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrw $1, %xmm2, 18(%rdi) +; AVX512F-NEXT: vpextrw $1, %xmm1, 18(%rdi) ; AVX512F-NEXT: LBB14_20: ## %else18 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 @@ -2450,16 +2435,14 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB14_22 ; AVX512F-NEXT: ## %bb.21: ## %cond.store19 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrw $2, %xmm2, 20(%rdi) +; AVX512F-NEXT: vpextrw $2, %xmm1, 20(%rdi) ; AVX512F-NEXT: LBB14_22: ## %else20 ; AVX512F-NEXT: kshiftrw $11, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB14_24 ; AVX512F-NEXT: ## %bb.23: ## %cond.store21 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrw $3, %xmm2, 22(%rdi) +; AVX512F-NEXT: vpextrw $3, %xmm1, 22(%rdi) ; AVX512F-NEXT: LBB14_24: ## %else22 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 @@ -2470,16 +2453,14 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB14_26 ; AVX512F-NEXT: ## %bb.25: ## %cond.store23 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrw $4, %xmm2, 24(%rdi) +; AVX512F-NEXT: vpextrw $4, %xmm1, 24(%rdi) ; AVX512F-NEXT: LBB14_26: ## %else24 ; AVX512F-NEXT: kshiftrw $13, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB14_28 ; AVX512F-NEXT: ## %bb.27: ## %cond.store25 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrw $5, %xmm2, 26(%rdi) +; AVX512F-NEXT: vpextrw $5, %xmm1, 26(%rdi) ; AVX512F-NEXT: LBB14_28: ## %else26 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 @@ -2490,16 +2471,14 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB14_30 ; AVX512F-NEXT: ## %bb.29: ## %cond.store27 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 -; AVX512F-NEXT: vpextrw $6, %xmm0, 28(%rdi) +; AVX512F-NEXT: vpextrw $6, %xmm1, 28(%rdi) ; AVX512F-NEXT: LBB14_30: ## %else28 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB14_32 ; AVX512F-NEXT: ## %bb.31: ## %cond.store29 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 -; AVX512F-NEXT: vpextrw $7, %xmm0, 30(%rdi) +; AVX512F-NEXT: vpextrw $7, %xmm1, 30(%rdi) ; AVX512F-NEXT: LBB14_32: ## %else30 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -2584,18 +2563,17 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX512VLDQ-NEXT: kshiftrw $8, %k0, %k1 ; AVX512VLDQ-NEXT: kmovw %k1, %eax ; AVX512VLDQ-NEXT: testb $1, %al +; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm1 ; AVX512VLDQ-NEXT: je LBB14_18 ; AVX512VLDQ-NEXT: ## %bb.17: ## %cond.store15 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrw $0, %xmm2, 16(%rdi) +; AVX512VLDQ-NEXT: vpextrw $0, %xmm1, 16(%rdi) ; AVX512VLDQ-NEXT: LBB14_18: ## %else16 ; AVX512VLDQ-NEXT: kshiftrw $9, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB14_20 ; AVX512VLDQ-NEXT: ## %bb.19: ## %cond.store17 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrw $1, %xmm2, 18(%rdi) +; AVX512VLDQ-NEXT: vpextrw $1, %xmm1, 18(%rdi) ; AVX512VLDQ-NEXT: LBB14_20: ## %else18 ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 @@ -2606,16 +2584,14 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB14_22 ; AVX512VLDQ-NEXT: ## %bb.21: ## %cond.store19 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrw $2, %xmm2, 20(%rdi) +; AVX512VLDQ-NEXT: vpextrw $2, %xmm1, 20(%rdi) ; AVX512VLDQ-NEXT: LBB14_22: ## %else20 ; AVX512VLDQ-NEXT: kshiftrw $11, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB14_24 ; AVX512VLDQ-NEXT: ## %bb.23: ## %cond.store21 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrw $3, %xmm2, 22(%rdi) +; AVX512VLDQ-NEXT: vpextrw $3, %xmm1, 22(%rdi) ; AVX512VLDQ-NEXT: LBB14_24: ## %else22 ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 @@ -2626,16 +2602,14 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB14_26 ; AVX512VLDQ-NEXT: ## %bb.25: ## %cond.store23 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrw $4, %xmm2, 24(%rdi) +; AVX512VLDQ-NEXT: vpextrw $4, %xmm1, 24(%rdi) ; AVX512VLDQ-NEXT: LBB14_26: ## %else24 ; AVX512VLDQ-NEXT: kshiftrw $13, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB14_28 ; AVX512VLDQ-NEXT: ## %bb.27: ## %cond.store25 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrw $5, %xmm2, 26(%rdi) +; AVX512VLDQ-NEXT: vpextrw $5, %xmm1, 26(%rdi) ; AVX512VLDQ-NEXT: LBB14_28: ## %else26 ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 @@ -2646,16 +2620,14 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB14_30 ; AVX512VLDQ-NEXT: ## %bb.29: ## %cond.store27 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm0 -; AVX512VLDQ-NEXT: vpextrw $6, %xmm0, 28(%rdi) +; AVX512VLDQ-NEXT: vpextrw $6, %xmm1, 28(%rdi) ; AVX512VLDQ-NEXT: LBB14_30: ## %else28 ; AVX512VLDQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB14_32 ; AVX512VLDQ-NEXT: ## %bb.31: ## %cond.store29 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm0 -; AVX512VLDQ-NEXT: vpextrw $7, %xmm0, 30(%rdi) +; AVX512VLDQ-NEXT: vpextrw $7, %xmm1, 30(%rdi) ; AVX512VLDQ-NEXT: LBB14_32: ## %else30 ; AVX512VLDQ-NEXT: vzeroupper ; AVX512VLDQ-NEXT: retq @@ -2680,134 +2652,126 @@ define void @store_v16i8_v16i8(<16 x i8> %trigger, <16 x i8>* %addr, <16 x i8> % ; SSE2: ## %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; SSE2-NEXT: movd %xmm2, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: movd %xmm2, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: je LBB15_2 ; SSE2-NEXT: ## %bb.1: ## %cond.store -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: movb %cl, (%rdi) +; SSE2-NEXT: movb %al, (%rdi) ; SSE2-NEXT: LBB15_2: ## %else -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB15_4 ; SSE2-NEXT: ## %bb.3: ## %cond.store1 -; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: movb %ah, 1(%rdi) ; SSE2-NEXT: LBB15_4: ## %else2 ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; SSE2-NEXT: movd %xmm2, %eax -; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $16, %ecx -; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: movd %xmm2, %ecx +; SSE2-NEXT: movl %ecx, %edx +; SSE2-NEXT: shrl $16, %edx +; SSE2-NEXT: testb $1, %dl ; SSE2-NEXT: je LBB15_6 ; SSE2-NEXT: ## %bb.5: ## %cond.store3 -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: shrl $16, %ecx -; SSE2-NEXT: movb %cl, 2(%rdi) +; SSE2-NEXT: movl %eax, %edx +; SSE2-NEXT: shrl $16, %edx +; SSE2-NEXT: movb %dl, 2(%rdi) ; SSE2-NEXT: LBB15_6: ## %else4 -; SSE2-NEXT: shrl $24, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $24, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB15_8 ; SSE2-NEXT: ## %bb.7: ## %cond.store5 -; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: shrl $24, %eax ; SSE2-NEXT: movb %al, 3(%rdi) ; SSE2-NEXT: LBB15_8: ## %else6 ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; SSE2-NEXT: pextrw $2, %xmm2, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $2, %xmm2, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $2, %xmm1, %eax ; SSE2-NEXT: je LBB15_10 ; SSE2-NEXT: ## %bb.9: ## %cond.store7 -; SSE2-NEXT: pextrw $2, %xmm1, %ecx -; SSE2-NEXT: movb %cl, 4(%rdi) +; SSE2-NEXT: movb %al, 4(%rdi) ; SSE2-NEXT: LBB15_10: ## %else8 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB15_12 ; SSE2-NEXT: ## %bb.11: ## %cond.store9 -; SSE2-NEXT: pextrw $2, %xmm1, %eax ; SSE2-NEXT: movb %ah, 5(%rdi) ; SSE2-NEXT: LBB15_12: ## %else10 ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; SSE2-NEXT: pextrw $3, %xmm2, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $3, %xmm2, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $3, %xmm1, %eax ; SSE2-NEXT: je LBB15_14 ; SSE2-NEXT: ## %bb.13: ## %cond.store11 -; SSE2-NEXT: pextrw $3, %xmm1, %ecx -; SSE2-NEXT: movb %cl, 6(%rdi) +; SSE2-NEXT: movb %al, 6(%rdi) ; SSE2-NEXT: LBB15_14: ## %else12 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB15_16 ; SSE2-NEXT: ## %bb.15: ## %cond.store13 -; SSE2-NEXT: pextrw $3, %xmm1, %eax ; SSE2-NEXT: movb %ah, 7(%rdi) ; SSE2-NEXT: LBB15_16: ## %else14 ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; SSE2-NEXT: pextrw $4, %xmm2, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $4, %xmm2, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $4, %xmm1, %eax ; SSE2-NEXT: je LBB15_18 ; SSE2-NEXT: ## %bb.17: ## %cond.store15 -; SSE2-NEXT: pextrw $4, %xmm1, %ecx -; SSE2-NEXT: movb %cl, 8(%rdi) +; SSE2-NEXT: movb %al, 8(%rdi) ; SSE2-NEXT: LBB15_18: ## %else16 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB15_20 ; SSE2-NEXT: ## %bb.19: ## %cond.store17 -; SSE2-NEXT: pextrw $4, %xmm1, %eax ; SSE2-NEXT: movb %ah, 9(%rdi) ; SSE2-NEXT: LBB15_20: ## %else18 ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; SSE2-NEXT: pextrw $5, %xmm2, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $5, %xmm2, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $5, %xmm1, %eax ; SSE2-NEXT: je LBB15_22 ; SSE2-NEXT: ## %bb.21: ## %cond.store19 -; SSE2-NEXT: pextrw $5, %xmm1, %ecx -; SSE2-NEXT: movb %cl, 10(%rdi) +; SSE2-NEXT: movb %al, 10(%rdi) ; SSE2-NEXT: LBB15_22: ## %else20 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB15_24 ; SSE2-NEXT: ## %bb.23: ## %cond.store21 -; SSE2-NEXT: pextrw $5, %xmm1, %eax ; SSE2-NEXT: movb %ah, 11(%rdi) ; SSE2-NEXT: LBB15_24: ## %else22 ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm2 -; SSE2-NEXT: pextrw $6, %xmm2, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $6, %xmm2, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $6, %xmm1, %eax ; SSE2-NEXT: je LBB15_26 ; SSE2-NEXT: ## %bb.25: ## %cond.store23 -; SSE2-NEXT: pextrw $6, %xmm1, %ecx -; SSE2-NEXT: movb %cl, 12(%rdi) +; SSE2-NEXT: movb %al, 12(%rdi) ; SSE2-NEXT: LBB15_26: ## %else24 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB15_28 ; SSE2-NEXT: ## %bb.27: ## %cond.store25 -; SSE2-NEXT: pextrw $6, %xmm1, %eax ; SSE2-NEXT: movb %ah, 13(%rdi) ; SSE2-NEXT: LBB15_28: ## %else26 ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqb %xmm2, %xmm0 -; SSE2-NEXT: pextrw $7, %xmm0, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $7, %xmm0, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $7, %xmm1, %eax ; SSE2-NEXT: je LBB15_30 ; SSE2-NEXT: ## %bb.29: ## %cond.store27 -; SSE2-NEXT: pextrw $7, %xmm1, %ecx -; SSE2-NEXT: movb %cl, 14(%rdi) +; SSE2-NEXT: movb %al, 14(%rdi) ; SSE2-NEXT: LBB15_30: ## %else28 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB15_32 ; SSE2-NEXT: ## %bb.31: ## %cond.store29 -; SSE2-NEXT: pextrw $7, %xmm1, %eax ; SSE2-NEXT: movb %ah, 15(%rdi) ; SSE2-NEXT: LBB15_32: ## %else30 ; SSE2-NEXT: retq @@ -3355,266 +3319,250 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; SSE2: ## %bb.0: ; SSE2-NEXT: pxor %xmm4, %xmm4 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm4 -; SSE2-NEXT: movd %xmm4, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: movd %xmm4, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: movd %xmm2, %eax ; SSE2-NEXT: je LBB16_2 ; SSE2-NEXT: ## %bb.1: ## %cond.store -; SSE2-NEXT: movd %xmm2, %ecx -; SSE2-NEXT: movb %cl, (%rdi) +; SSE2-NEXT: movb %al, (%rdi) ; SSE2-NEXT: LBB16_2: ## %else -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_4 ; SSE2-NEXT: ## %bb.3: ## %cond.store1 -; SSE2-NEXT: movd %xmm2, %eax ; SSE2-NEXT: movb %ah, 1(%rdi) ; SSE2-NEXT: LBB16_4: ## %else2 ; SSE2-NEXT: pxor %xmm4, %xmm4 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm4 -; SSE2-NEXT: movd %xmm4, %eax -; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $16, %ecx -; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: movd %xmm4, %ecx +; SSE2-NEXT: movl %ecx, %edx +; SSE2-NEXT: shrl $16, %edx +; SSE2-NEXT: testb $1, %dl ; SSE2-NEXT: je LBB16_6 ; SSE2-NEXT: ## %bb.5: ## %cond.store3 -; SSE2-NEXT: movd %xmm2, %ecx -; SSE2-NEXT: shrl $16, %ecx -; SSE2-NEXT: movb %cl, 2(%rdi) +; SSE2-NEXT: movl %eax, %edx +; SSE2-NEXT: shrl $16, %edx +; SSE2-NEXT: movb %dl, 2(%rdi) ; SSE2-NEXT: LBB16_6: ## %else4 -; SSE2-NEXT: shrl $24, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $24, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_8 ; SSE2-NEXT: ## %bb.7: ## %cond.store5 -; SSE2-NEXT: movd %xmm2, %eax ; SSE2-NEXT: shrl $24, %eax ; SSE2-NEXT: movb %al, 3(%rdi) ; SSE2-NEXT: LBB16_8: ## %else6 ; SSE2-NEXT: pxor %xmm4, %xmm4 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm4 -; SSE2-NEXT: pextrw $2, %xmm4, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $2, %xmm4, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $2, %xmm2, %eax ; SSE2-NEXT: je LBB16_10 ; SSE2-NEXT: ## %bb.9: ## %cond.store7 -; SSE2-NEXT: pextrw $2, %xmm2, %ecx -; SSE2-NEXT: movb %cl, 4(%rdi) +; SSE2-NEXT: movb %al, 4(%rdi) ; SSE2-NEXT: LBB16_10: ## %else8 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_12 ; SSE2-NEXT: ## %bb.11: ## %cond.store9 -; SSE2-NEXT: pextrw $2, %xmm2, %eax ; SSE2-NEXT: movb %ah, 5(%rdi) ; SSE2-NEXT: LBB16_12: ## %else10 ; SSE2-NEXT: pxor %xmm4, %xmm4 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm4 -; SSE2-NEXT: pextrw $3, %xmm4, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $3, %xmm4, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $3, %xmm2, %eax ; SSE2-NEXT: je LBB16_14 ; SSE2-NEXT: ## %bb.13: ## %cond.store11 -; SSE2-NEXT: pextrw $3, %xmm2, %ecx -; SSE2-NEXT: movb %cl, 6(%rdi) +; SSE2-NEXT: movb %al, 6(%rdi) ; SSE2-NEXT: LBB16_14: ## %else12 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_16 ; SSE2-NEXT: ## %bb.15: ## %cond.store13 -; SSE2-NEXT: pextrw $3, %xmm2, %eax ; SSE2-NEXT: movb %ah, 7(%rdi) ; SSE2-NEXT: LBB16_16: ## %else14 ; SSE2-NEXT: pxor %xmm4, %xmm4 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm4 -; SSE2-NEXT: pextrw $4, %xmm4, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $4, %xmm4, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $4, %xmm2, %eax ; SSE2-NEXT: je LBB16_18 ; SSE2-NEXT: ## %bb.17: ## %cond.store15 -; SSE2-NEXT: pextrw $4, %xmm2, %ecx -; SSE2-NEXT: movb %cl, 8(%rdi) +; SSE2-NEXT: movb %al, 8(%rdi) ; SSE2-NEXT: LBB16_18: ## %else16 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_20 ; SSE2-NEXT: ## %bb.19: ## %cond.store17 -; SSE2-NEXT: pextrw $4, %xmm2, %eax ; SSE2-NEXT: movb %ah, 9(%rdi) ; SSE2-NEXT: LBB16_20: ## %else18 ; SSE2-NEXT: pxor %xmm4, %xmm4 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm4 -; SSE2-NEXT: pextrw $5, %xmm4, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $5, %xmm4, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $5, %xmm2, %eax ; SSE2-NEXT: je LBB16_22 ; SSE2-NEXT: ## %bb.21: ## %cond.store19 -; SSE2-NEXT: pextrw $5, %xmm2, %ecx -; SSE2-NEXT: movb %cl, 10(%rdi) +; SSE2-NEXT: movb %al, 10(%rdi) ; SSE2-NEXT: LBB16_22: ## %else20 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_24 ; SSE2-NEXT: ## %bb.23: ## %cond.store21 -; SSE2-NEXT: pextrw $5, %xmm2, %eax ; SSE2-NEXT: movb %ah, 11(%rdi) ; SSE2-NEXT: LBB16_24: ## %else22 ; SSE2-NEXT: pxor %xmm4, %xmm4 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm4 -; SSE2-NEXT: pextrw $6, %xmm4, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $6, %xmm4, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $6, %xmm2, %eax ; SSE2-NEXT: je LBB16_26 ; SSE2-NEXT: ## %bb.25: ## %cond.store23 -; SSE2-NEXT: pextrw $6, %xmm2, %ecx -; SSE2-NEXT: movb %cl, 12(%rdi) +; SSE2-NEXT: movb %al, 12(%rdi) ; SSE2-NEXT: LBB16_26: ## %else24 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_28 ; SSE2-NEXT: ## %bb.27: ## %cond.store25 -; SSE2-NEXT: pextrw $6, %xmm2, %eax ; SSE2-NEXT: movb %ah, 13(%rdi) ; SSE2-NEXT: LBB16_28: ## %else26 ; SSE2-NEXT: pxor %xmm4, %xmm4 ; SSE2-NEXT: pcmpeqb %xmm4, %xmm0 -; SSE2-NEXT: pextrw $7, %xmm0, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $7, %xmm0, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $7, %xmm2, %eax ; SSE2-NEXT: je LBB16_30 ; SSE2-NEXT: ## %bb.29: ## %cond.store27 -; SSE2-NEXT: pextrw $7, %xmm2, %ecx -; SSE2-NEXT: movb %cl, 14(%rdi) +; SSE2-NEXT: movb %al, 14(%rdi) ; SSE2-NEXT: LBB16_30: ## %else28 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_32 ; SSE2-NEXT: ## %bb.31: ## %cond.store29 -; SSE2-NEXT: pextrw $7, %xmm2, %eax ; SSE2-NEXT: movb %ah, 15(%rdi) ; SSE2-NEXT: LBB16_32: ## %else30 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: movd %xmm0, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: movd %xmm3, %eax ; SSE2-NEXT: je LBB16_34 ; SSE2-NEXT: ## %bb.33: ## %cond.store31 -; SSE2-NEXT: movd %xmm3, %ecx -; SSE2-NEXT: movb %cl, 16(%rdi) +; SSE2-NEXT: movb %al, 16(%rdi) ; SSE2-NEXT: LBB16_34: ## %else32 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_36 ; SSE2-NEXT: ## %bb.35: ## %cond.store33 -; SSE2-NEXT: movd %xmm3, %eax ; SSE2-NEXT: movb %ah, 17(%rdi) ; SSE2-NEXT: LBB16_36: ## %else34 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: movl %eax, %ecx -; SSE2-NEXT: shrl $16, %ecx -; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: movd %xmm0, %ecx +; SSE2-NEXT: movl %ecx, %edx +; SSE2-NEXT: shrl $16, %edx +; SSE2-NEXT: testb $1, %dl ; SSE2-NEXT: je LBB16_38 ; SSE2-NEXT: ## %bb.37: ## %cond.store35 -; SSE2-NEXT: movd %xmm3, %ecx -; SSE2-NEXT: shrl $16, %ecx -; SSE2-NEXT: movb %cl, 18(%rdi) +; SSE2-NEXT: movl %eax, %edx +; SSE2-NEXT: shrl $16, %edx +; SSE2-NEXT: movb %dl, 18(%rdi) ; SSE2-NEXT: LBB16_38: ## %else36 -; SSE2-NEXT: shrl $24, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $24, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_40 ; SSE2-NEXT: ## %bb.39: ## %cond.store37 -; SSE2-NEXT: movd %xmm3, %eax ; SSE2-NEXT: shrl $24, %eax ; SSE2-NEXT: movb %al, 19(%rdi) ; SSE2-NEXT: LBB16_40: ## %else38 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; SSE2-NEXT: pextrw $2, %xmm0, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $2, %xmm0, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $2, %xmm3, %eax ; SSE2-NEXT: je LBB16_42 ; SSE2-NEXT: ## %bb.41: ## %cond.store39 -; SSE2-NEXT: pextrw $2, %xmm3, %ecx -; SSE2-NEXT: movb %cl, 20(%rdi) +; SSE2-NEXT: movb %al, 20(%rdi) ; SSE2-NEXT: LBB16_42: ## %else40 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_44 ; SSE2-NEXT: ## %bb.43: ## %cond.store41 -; SSE2-NEXT: pextrw $2, %xmm3, %eax ; SSE2-NEXT: movb %ah, 21(%rdi) ; SSE2-NEXT: LBB16_44: ## %else42 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; SSE2-NEXT: pextrw $3, %xmm0, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $3, %xmm0, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $3, %xmm3, %eax ; SSE2-NEXT: je LBB16_46 ; SSE2-NEXT: ## %bb.45: ## %cond.store43 -; SSE2-NEXT: pextrw $3, %xmm3, %ecx -; SSE2-NEXT: movb %cl, 22(%rdi) +; SSE2-NEXT: movb %al, 22(%rdi) ; SSE2-NEXT: LBB16_46: ## %else44 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_48 ; SSE2-NEXT: ## %bb.47: ## %cond.store45 -; SSE2-NEXT: pextrw $3, %xmm3, %eax ; SSE2-NEXT: movb %ah, 23(%rdi) ; SSE2-NEXT: LBB16_48: ## %else46 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; SSE2-NEXT: pextrw $4, %xmm0, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $4, %xmm0, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $4, %xmm3, %eax ; SSE2-NEXT: je LBB16_50 ; SSE2-NEXT: ## %bb.49: ## %cond.store47 -; SSE2-NEXT: pextrw $4, %xmm3, %ecx -; SSE2-NEXT: movb %cl, 24(%rdi) +; SSE2-NEXT: movb %al, 24(%rdi) ; SSE2-NEXT: LBB16_50: ## %else48 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_52 ; SSE2-NEXT: ## %bb.51: ## %cond.store49 -; SSE2-NEXT: pextrw $4, %xmm3, %eax ; SSE2-NEXT: movb %ah, 25(%rdi) ; SSE2-NEXT: LBB16_52: ## %else50 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; SSE2-NEXT: pextrw $5, %xmm0, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $5, %xmm0, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $5, %xmm3, %eax ; SSE2-NEXT: je LBB16_54 ; SSE2-NEXT: ## %bb.53: ## %cond.store51 -; SSE2-NEXT: pextrw $5, %xmm3, %ecx -; SSE2-NEXT: movb %cl, 26(%rdi) +; SSE2-NEXT: movb %al, 26(%rdi) ; SSE2-NEXT: LBB16_54: ## %else52 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_56 ; SSE2-NEXT: ## %bb.55: ## %cond.store53 -; SSE2-NEXT: pextrw $5, %xmm3, %eax ; SSE2-NEXT: movb %ah, 27(%rdi) ; SSE2-NEXT: LBB16_56: ## %else54 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 -; SSE2-NEXT: pextrw $6, %xmm0, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $6, %xmm0, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $6, %xmm3, %eax ; SSE2-NEXT: je LBB16_58 ; SSE2-NEXT: ## %bb.57: ## %cond.store55 -; SSE2-NEXT: pextrw $6, %xmm3, %ecx -; SSE2-NEXT: movb %cl, 28(%rdi) +; SSE2-NEXT: movb %al, 28(%rdi) ; SSE2-NEXT: LBB16_58: ## %else56 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_60 ; SSE2-NEXT: ## %bb.59: ## %cond.store57 -; SSE2-NEXT: pextrw $6, %xmm3, %eax ; SSE2-NEXT: movb %ah, 29(%rdi) ; SSE2-NEXT: LBB16_60: ## %else58 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; SSE2-NEXT: pextrw $7, %xmm1, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: pextrw $7, %xmm1, %ecx +; SSE2-NEXT: testb $1, %cl +; SSE2-NEXT: pextrw $7, %xmm3, %eax ; SSE2-NEXT: je LBB16_62 ; SSE2-NEXT: ## %bb.61: ## %cond.store59 -; SSE2-NEXT: pextrw $7, %xmm3, %ecx -; SSE2-NEXT: movb %cl, 30(%rdi) +; SSE2-NEXT: movb %al, 30(%rdi) ; SSE2-NEXT: LBB16_62: ## %else60 -; SSE2-NEXT: shrl $8, %eax -; SSE2-NEXT: testb $1, %al +; SSE2-NEXT: shrl $8, %ecx +; SSE2-NEXT: testb $1, %cl ; SSE2-NEXT: je LBB16_64 ; SSE2-NEXT: ## %bb.63: ## %cond.store61 -; SSE2-NEXT: pextrw $7, %xmm3, %eax ; SSE2-NEXT: movb %ah, 31(%rdi) ; SSE2-NEXT: LBB16_64: ## %else62 ; SSE2-NEXT: retq @@ -3969,133 +3917,118 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX1-NEXT: ## %bb.31: ## %cond.store29 ; AVX1-NEXT: vpextrb $15, %xmm1, 15(%rdi) ; AVX1-NEXT: LBB16_32: ## %else30 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpextrb $0, %xmm2, %eax +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vpcmpeqb %xmm0, %xmm2, %xmm3 +; AVX1-NEXT: vpextrb $0, %xmm3, %eax ; AVX1-NEXT: testb $1, %al +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 ; AVX1-NEXT: je LBB16_34 ; AVX1-NEXT: ## %bb.33: ## %cond.store31 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpextrb $0, %xmm3, 16(%rdi) +; AVX1-NEXT: vpextrb $0, %xmm0, 16(%rdi) ; AVX1-NEXT: LBB16_34: ## %else32 -; AVX1-NEXT: vpextrb $1, %xmm2, %eax +; AVX1-NEXT: vpextrb $1, %xmm3, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_36 ; AVX1-NEXT: ## %bb.35: ## %cond.store33 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpextrb $1, %xmm2, 17(%rdi) +; AVX1-NEXT: vpextrb $1, %xmm0, 17(%rdi) ; AVX1-NEXT: LBB16_36: ## %else34 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpextrb $2, %xmm2, %eax +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $2, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_38 ; AVX1-NEXT: ## %bb.37: ## %cond.store35 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpextrb $2, %xmm3, 18(%rdi) +; AVX1-NEXT: vpextrb $2, %xmm0, 18(%rdi) ; AVX1-NEXT: LBB16_38: ## %else36 -; AVX1-NEXT: vpextrb $3, %xmm2, %eax +; AVX1-NEXT: vpextrb $3, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_40 ; AVX1-NEXT: ## %bb.39: ## %cond.store37 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpextrb $3, %xmm2, 19(%rdi) +; AVX1-NEXT: vpextrb $3, %xmm0, 19(%rdi) ; AVX1-NEXT: LBB16_40: ## %else38 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpextrb $4, %xmm2, %eax +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $4, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_42 ; AVX1-NEXT: ## %bb.41: ## %cond.store39 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpextrb $4, %xmm3, 20(%rdi) +; AVX1-NEXT: vpextrb $4, %xmm0, 20(%rdi) ; AVX1-NEXT: LBB16_42: ## %else40 -; AVX1-NEXT: vpextrb $5, %xmm2, %eax +; AVX1-NEXT: vpextrb $5, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_44 ; AVX1-NEXT: ## %bb.43: ## %cond.store41 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpextrb $5, %xmm2, 21(%rdi) +; AVX1-NEXT: vpextrb $5, %xmm0, 21(%rdi) ; AVX1-NEXT: LBB16_44: ## %else42 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpextrb $6, %xmm2, %eax +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $6, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_46 ; AVX1-NEXT: ## %bb.45: ## %cond.store43 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpextrb $6, %xmm3, 22(%rdi) +; AVX1-NEXT: vpextrb $6, %xmm0, 22(%rdi) ; AVX1-NEXT: LBB16_46: ## %else44 -; AVX1-NEXT: vpextrb $7, %xmm2, %eax +; AVX1-NEXT: vpextrb $7, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_48 ; AVX1-NEXT: ## %bb.47: ## %cond.store45 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpextrb $7, %xmm2, 23(%rdi) +; AVX1-NEXT: vpextrb $7, %xmm0, 23(%rdi) ; AVX1-NEXT: LBB16_48: ## %else46 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpextrb $8, %xmm2, %eax +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $8, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_50 ; AVX1-NEXT: ## %bb.49: ## %cond.store47 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpextrb $8, %xmm3, 24(%rdi) +; AVX1-NEXT: vpextrb $8, %xmm0, 24(%rdi) ; AVX1-NEXT: LBB16_50: ## %else48 -; AVX1-NEXT: vpextrb $9, %xmm2, %eax +; AVX1-NEXT: vpextrb $9, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_52 ; AVX1-NEXT: ## %bb.51: ## %cond.store49 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpextrb $9, %xmm2, 25(%rdi) +; AVX1-NEXT: vpextrb $9, %xmm0, 25(%rdi) ; AVX1-NEXT: LBB16_52: ## %else50 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpextrb $10, %xmm2, %eax +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $10, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_54 ; AVX1-NEXT: ## %bb.53: ## %cond.store51 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpextrb $10, %xmm3, 26(%rdi) +; AVX1-NEXT: vpextrb $10, %xmm0, 26(%rdi) ; AVX1-NEXT: LBB16_54: ## %else52 -; AVX1-NEXT: vpextrb $11, %xmm2, %eax +; AVX1-NEXT: vpextrb $11, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_56 ; AVX1-NEXT: ## %bb.55: ## %cond.store53 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpextrb $11, %xmm2, 27(%rdi) +; AVX1-NEXT: vpextrb $11, %xmm0, 27(%rdi) ; AVX1-NEXT: LBB16_56: ## %else54 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpextrb $12, %xmm2, %eax +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $12, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_58 ; AVX1-NEXT: ## %bb.57: ## %cond.store55 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpextrb $12, %xmm3, 28(%rdi) +; AVX1-NEXT: vpextrb $12, %xmm0, 28(%rdi) ; AVX1-NEXT: LBB16_58: ## %else56 -; AVX1-NEXT: vpextrb $13, %xmm2, %eax +; AVX1-NEXT: vpextrb $13, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_60 ; AVX1-NEXT: ## %bb.59: ## %cond.store57 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpextrb $13, %xmm2, 29(%rdi) +; AVX1-NEXT: vpextrb $13, %xmm0, 29(%rdi) ; AVX1-NEXT: LBB16_60: ## %else58 -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpextrb $14, %xmm0, %eax +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vpextrb $14, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_62 ; AVX1-NEXT: ## %bb.61: ## %cond.store59 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vpextrb $14, %xmm2, 30(%rdi) +; AVX1-NEXT: vpextrb $14, %xmm0, 30(%rdi) ; AVX1-NEXT: LBB16_62: ## %else60 -; AVX1-NEXT: vpextrb $15, %xmm0, %eax +; AVX1-NEXT: vpextrb $15, %xmm1, %eax ; AVX1-NEXT: testb $1, %al ; AVX1-NEXT: je LBB16_64 ; AVX1-NEXT: ## %bb.63: ## %cond.store61 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 ; AVX1-NEXT: vpextrb $15, %xmm0, 31(%rdi) ; AVX1-NEXT: LBB16_64: ## %else62 ; AVX1-NEXT: vzeroupper @@ -4220,17 +4153,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 ; AVX2-NEXT: vpextrb $0, %xmm2, %eax ; AVX2-NEXT: testb $1, %al +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 ; AVX2-NEXT: je LBB16_34 ; AVX2-NEXT: ## %bb.33: ## %cond.store31 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpextrb $0, %xmm3, 16(%rdi) +; AVX2-NEXT: vpextrb $0, %xmm1, 16(%rdi) ; AVX2-NEXT: LBB16_34: ## %else32 ; AVX2-NEXT: vpextrb $1, %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_36 ; AVX2-NEXT: ## %bb.35: ## %cond.store33 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrb $1, %xmm2, 17(%rdi) +; AVX2-NEXT: vpextrb $1, %xmm1, 17(%rdi) ; AVX2-NEXT: LBB16_36: ## %else34 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4239,15 +4171,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_38 ; AVX2-NEXT: ## %bb.37: ## %cond.store35 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpextrb $2, %xmm3, 18(%rdi) +; AVX2-NEXT: vpextrb $2, %xmm1, 18(%rdi) ; AVX2-NEXT: LBB16_38: ## %else36 ; AVX2-NEXT: vpextrb $3, %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_40 ; AVX2-NEXT: ## %bb.39: ## %cond.store37 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrb $3, %xmm2, 19(%rdi) +; AVX2-NEXT: vpextrb $3, %xmm1, 19(%rdi) ; AVX2-NEXT: LBB16_40: ## %else38 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4256,15 +4186,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_42 ; AVX2-NEXT: ## %bb.41: ## %cond.store39 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpextrb $4, %xmm3, 20(%rdi) +; AVX2-NEXT: vpextrb $4, %xmm1, 20(%rdi) ; AVX2-NEXT: LBB16_42: ## %else40 ; AVX2-NEXT: vpextrb $5, %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_44 ; AVX2-NEXT: ## %bb.43: ## %cond.store41 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrb $5, %xmm2, 21(%rdi) +; AVX2-NEXT: vpextrb $5, %xmm1, 21(%rdi) ; AVX2-NEXT: LBB16_44: ## %else42 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4273,15 +4201,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_46 ; AVX2-NEXT: ## %bb.45: ## %cond.store43 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpextrb $6, %xmm3, 22(%rdi) +; AVX2-NEXT: vpextrb $6, %xmm1, 22(%rdi) ; AVX2-NEXT: LBB16_46: ## %else44 ; AVX2-NEXT: vpextrb $7, %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_48 ; AVX2-NEXT: ## %bb.47: ## %cond.store45 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrb $7, %xmm2, 23(%rdi) +; AVX2-NEXT: vpextrb $7, %xmm1, 23(%rdi) ; AVX2-NEXT: LBB16_48: ## %else46 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4290,15 +4216,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_50 ; AVX2-NEXT: ## %bb.49: ## %cond.store47 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpextrb $8, %xmm3, 24(%rdi) +; AVX2-NEXT: vpextrb $8, %xmm1, 24(%rdi) ; AVX2-NEXT: LBB16_50: ## %else48 ; AVX2-NEXT: vpextrb $9, %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_52 ; AVX2-NEXT: ## %bb.51: ## %cond.store49 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrb $9, %xmm2, 25(%rdi) +; AVX2-NEXT: vpextrb $9, %xmm1, 25(%rdi) ; AVX2-NEXT: LBB16_52: ## %else50 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4307,15 +4231,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_54 ; AVX2-NEXT: ## %bb.53: ## %cond.store51 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpextrb $10, %xmm3, 26(%rdi) +; AVX2-NEXT: vpextrb $10, %xmm1, 26(%rdi) ; AVX2-NEXT: LBB16_54: ## %else52 ; AVX2-NEXT: vpextrb $11, %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_56 ; AVX2-NEXT: ## %bb.55: ## %cond.store53 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrb $11, %xmm2, 27(%rdi) +; AVX2-NEXT: vpextrb $11, %xmm1, 27(%rdi) ; AVX2-NEXT: LBB16_56: ## %else54 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4324,15 +4246,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_58 ; AVX2-NEXT: ## %bb.57: ## %cond.store55 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 -; AVX2-NEXT: vpextrb $12, %xmm3, 28(%rdi) +; AVX2-NEXT: vpextrb $12, %xmm1, 28(%rdi) ; AVX2-NEXT: LBB16_58: ## %else56 ; AVX2-NEXT: vpextrb $13, %xmm2, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_60 ; AVX2-NEXT: ## %bb.59: ## %cond.store57 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrb $13, %xmm2, 29(%rdi) +; AVX2-NEXT: vpextrb $13, %xmm1, 29(%rdi) ; AVX2-NEXT: LBB16_60: ## %else58 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 @@ -4341,15 +4261,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_62 ; AVX2-NEXT: ## %bb.61: ## %cond.store59 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX2-NEXT: vpextrb $14, %xmm2, 30(%rdi) +; AVX2-NEXT: vpextrb $14, %xmm1, 30(%rdi) ; AVX2-NEXT: LBB16_62: ## %else60 ; AVX2-NEXT: vpextrb $15, %xmm0, %eax ; AVX2-NEXT: testb $1, %al ; AVX2-NEXT: je LBB16_64 ; AVX2-NEXT: ## %bb.63: ## %cond.store61 -; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0 -; AVX2-NEXT: vpextrb $15, %xmm0, 31(%rdi) +; AVX2-NEXT: vpextrb $15, %xmm1, 31(%rdi) ; AVX2-NEXT: LBB16_64: ## %else62 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -4522,18 +4440,17 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al +; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1 ; AVX512F-NEXT: je LBB16_34 ; AVX512F-NEXT: ## %bb.33: ## %cond.store31 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $0, %xmm2, 16(%rdi) +; AVX512F-NEXT: vpextrb $0, %xmm1, 16(%rdi) ; AVX512F-NEXT: LBB16_34: ## %else32 ; AVX512F-NEXT: kshiftrw $1, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_36 ; AVX512F-NEXT: ## %bb.35: ## %cond.store33 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $1, %xmm2, 17(%rdi) +; AVX512F-NEXT: vpextrb $1, %xmm1, 17(%rdi) ; AVX512F-NEXT: LBB16_36: ## %else34 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4545,16 +4462,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_38 ; AVX512F-NEXT: ## %bb.37: ## %cond.store35 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $2, %xmm2, 18(%rdi) +; AVX512F-NEXT: vpextrb $2, %xmm1, 18(%rdi) ; AVX512F-NEXT: LBB16_38: ## %else36 ; AVX512F-NEXT: kshiftrw $3, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_40 ; AVX512F-NEXT: ## %bb.39: ## %cond.store37 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $3, %xmm2, 19(%rdi) +; AVX512F-NEXT: vpextrb $3, %xmm1, 19(%rdi) ; AVX512F-NEXT: LBB16_40: ## %else38 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4566,16 +4481,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_42 ; AVX512F-NEXT: ## %bb.41: ## %cond.store39 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $4, %xmm2, 20(%rdi) +; AVX512F-NEXT: vpextrb $4, %xmm1, 20(%rdi) ; AVX512F-NEXT: LBB16_42: ## %else40 ; AVX512F-NEXT: kshiftrw $5, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_44 ; AVX512F-NEXT: ## %bb.43: ## %cond.store41 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $5, %xmm2, 21(%rdi) +; AVX512F-NEXT: vpextrb $5, %xmm1, 21(%rdi) ; AVX512F-NEXT: LBB16_44: ## %else42 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4587,16 +4500,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_46 ; AVX512F-NEXT: ## %bb.45: ## %cond.store43 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $6, %xmm2, 22(%rdi) +; AVX512F-NEXT: vpextrb $6, %xmm1, 22(%rdi) ; AVX512F-NEXT: LBB16_46: ## %else44 ; AVX512F-NEXT: kshiftrw $7, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_48 ; AVX512F-NEXT: ## %bb.47: ## %cond.store45 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $7, %xmm2, 23(%rdi) +; AVX512F-NEXT: vpextrb $7, %xmm1, 23(%rdi) ; AVX512F-NEXT: LBB16_48: ## %else46 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4608,16 +4519,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_50 ; AVX512F-NEXT: ## %bb.49: ## %cond.store47 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $8, %xmm2, 24(%rdi) +; AVX512F-NEXT: vpextrb $8, %xmm1, 24(%rdi) ; AVX512F-NEXT: LBB16_50: ## %else48 ; AVX512F-NEXT: kshiftrw $9, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_52 ; AVX512F-NEXT: ## %bb.51: ## %cond.store49 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $9, %xmm2, 25(%rdi) +; AVX512F-NEXT: vpextrb $9, %xmm1, 25(%rdi) ; AVX512F-NEXT: LBB16_52: ## %else50 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4629,16 +4538,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_54 ; AVX512F-NEXT: ## %bb.53: ## %cond.store51 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $10, %xmm2, 26(%rdi) +; AVX512F-NEXT: vpextrb $10, %xmm1, 26(%rdi) ; AVX512F-NEXT: LBB16_54: ## %else52 ; AVX512F-NEXT: kshiftrw $11, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_56 ; AVX512F-NEXT: ## %bb.55: ## %cond.store53 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $11, %xmm2, 27(%rdi) +; AVX512F-NEXT: vpextrb $11, %xmm1, 27(%rdi) ; AVX512F-NEXT: LBB16_56: ## %else54 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4650,16 +4557,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_58 ; AVX512F-NEXT: ## %bb.57: ## %cond.store55 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $12, %xmm2, 28(%rdi) +; AVX512F-NEXT: vpextrb $12, %xmm1, 28(%rdi) ; AVX512F-NEXT: LBB16_58: ## %else56 ; AVX512F-NEXT: kshiftrw $13, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_60 ; AVX512F-NEXT: ## %bb.59: ## %cond.store57 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512F-NEXT: vpextrb $13, %xmm2, 29(%rdi) +; AVX512F-NEXT: vpextrb $13, %xmm1, 29(%rdi) ; AVX512F-NEXT: LBB16_60: ## %else58 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 @@ -4671,16 +4576,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_62 ; AVX512F-NEXT: ## %bb.61: ## %cond.store59 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 -; AVX512F-NEXT: vpextrb $14, %xmm0, 30(%rdi) +; AVX512F-NEXT: vpextrb $14, %xmm1, 30(%rdi) ; AVX512F-NEXT: LBB16_62: ## %else60 ; AVX512F-NEXT: kshiftrw $15, %k0, %k0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: je LBB16_64 ; AVX512F-NEXT: ## %bb.63: ## %cond.store61 -; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0 -; AVX512F-NEXT: vpextrb $15, %xmm0, 31(%rdi) +; AVX512F-NEXT: vpextrb $15, %xmm1, 31(%rdi) ; AVX512F-NEXT: LBB16_64: ## %else62 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -4853,18 +4756,17 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al +; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm1 ; AVX512VLDQ-NEXT: je LBB16_34 ; AVX512VLDQ-NEXT: ## %bb.33: ## %cond.store31 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $0, %xmm2, 16(%rdi) +; AVX512VLDQ-NEXT: vpextrb $0, %xmm1, 16(%rdi) ; AVX512VLDQ-NEXT: LBB16_34: ## %else32 ; AVX512VLDQ-NEXT: kshiftrw $1, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_36 ; AVX512VLDQ-NEXT: ## %bb.35: ## %cond.store33 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $1, %xmm2, 17(%rdi) +; AVX512VLDQ-NEXT: vpextrb $1, %xmm1, 17(%rdi) ; AVX512VLDQ-NEXT: LBB16_36: ## %else34 ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4876,16 +4778,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_38 ; AVX512VLDQ-NEXT: ## %bb.37: ## %cond.store35 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $2, %xmm2, 18(%rdi) +; AVX512VLDQ-NEXT: vpextrb $2, %xmm1, 18(%rdi) ; AVX512VLDQ-NEXT: LBB16_38: ## %else36 ; AVX512VLDQ-NEXT: kshiftrw $3, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_40 ; AVX512VLDQ-NEXT: ## %bb.39: ## %cond.store37 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $3, %xmm2, 19(%rdi) +; AVX512VLDQ-NEXT: vpextrb $3, %xmm1, 19(%rdi) ; AVX512VLDQ-NEXT: LBB16_40: ## %else38 ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4897,16 +4797,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_42 ; AVX512VLDQ-NEXT: ## %bb.41: ## %cond.store39 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $4, %xmm2, 20(%rdi) +; AVX512VLDQ-NEXT: vpextrb $4, %xmm1, 20(%rdi) ; AVX512VLDQ-NEXT: LBB16_42: ## %else40 ; AVX512VLDQ-NEXT: kshiftrw $5, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_44 ; AVX512VLDQ-NEXT: ## %bb.43: ## %cond.store41 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $5, %xmm2, 21(%rdi) +; AVX512VLDQ-NEXT: vpextrb $5, %xmm1, 21(%rdi) ; AVX512VLDQ-NEXT: LBB16_44: ## %else42 ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4918,16 +4816,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_46 ; AVX512VLDQ-NEXT: ## %bb.45: ## %cond.store43 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $6, %xmm2, 22(%rdi) +; AVX512VLDQ-NEXT: vpextrb $6, %xmm1, 22(%rdi) ; AVX512VLDQ-NEXT: LBB16_46: ## %else44 ; AVX512VLDQ-NEXT: kshiftrw $7, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_48 ; AVX512VLDQ-NEXT: ## %bb.47: ## %cond.store45 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $7, %xmm2, 23(%rdi) +; AVX512VLDQ-NEXT: vpextrb $7, %xmm1, 23(%rdi) ; AVX512VLDQ-NEXT: LBB16_48: ## %else46 ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4939,16 +4835,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_50 ; AVX512VLDQ-NEXT: ## %bb.49: ## %cond.store47 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $8, %xmm2, 24(%rdi) +; AVX512VLDQ-NEXT: vpextrb $8, %xmm1, 24(%rdi) ; AVX512VLDQ-NEXT: LBB16_50: ## %else48 ; AVX512VLDQ-NEXT: kshiftrw $9, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_52 ; AVX512VLDQ-NEXT: ## %bb.51: ## %cond.store49 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $9, %xmm2, 25(%rdi) +; AVX512VLDQ-NEXT: vpextrb $9, %xmm1, 25(%rdi) ; AVX512VLDQ-NEXT: LBB16_52: ## %else50 ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4960,16 +4854,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_54 ; AVX512VLDQ-NEXT: ## %bb.53: ## %cond.store51 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $10, %xmm2, 26(%rdi) +; AVX512VLDQ-NEXT: vpextrb $10, %xmm1, 26(%rdi) ; AVX512VLDQ-NEXT: LBB16_54: ## %else52 ; AVX512VLDQ-NEXT: kshiftrw $11, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_56 ; AVX512VLDQ-NEXT: ## %bb.55: ## %cond.store53 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $11, %xmm2, 27(%rdi) +; AVX512VLDQ-NEXT: vpextrb $11, %xmm1, 27(%rdi) ; AVX512VLDQ-NEXT: LBB16_56: ## %else54 ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2 @@ -4981,16 +4873,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_58 ; AVX512VLDQ-NEXT: ## %bb.57: ## %cond.store55 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $12, %xmm2, 28(%rdi) +; AVX512VLDQ-NEXT: vpextrb $12, %xmm1, 28(%rdi) ; AVX512VLDQ-NEXT: LBB16_58: ## %else56 ; AVX512VLDQ-NEXT: kshiftrw $13, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_60 ; AVX512VLDQ-NEXT: ## %bb.59: ## %cond.store57 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2 -; AVX512VLDQ-NEXT: vpextrb $13, %xmm2, 29(%rdi) +; AVX512VLDQ-NEXT: vpextrb $13, %xmm1, 29(%rdi) ; AVX512VLDQ-NEXT: LBB16_60: ## %else58 ; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 @@ -5002,16 +4892,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> % ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_62 ; AVX512VLDQ-NEXT: ## %bb.61: ## %cond.store59 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm0 -; AVX512VLDQ-NEXT: vpextrb $14, %xmm0, 30(%rdi) +; AVX512VLDQ-NEXT: vpextrb $14, %xmm1, 30(%rdi) ; AVX512VLDQ-NEXT: LBB16_62: ## %else60 ; AVX512VLDQ-NEXT: kshiftrw $15, %k0, %k0 ; AVX512VLDQ-NEXT: kmovw %k0, %eax ; AVX512VLDQ-NEXT: testb $1, %al ; AVX512VLDQ-NEXT: je LBB16_64 ; AVX512VLDQ-NEXT: ## %bb.63: ## %cond.store61 -; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm0 -; AVX512VLDQ-NEXT: vpextrb $15, %xmm0, 31(%rdi) +; AVX512VLDQ-NEXT: vpextrb $15, %xmm1, 31(%rdi) ; AVX512VLDQ-NEXT: LBB16_64: ## %else62 ; AVX512VLDQ-NEXT: vzeroupper ; AVX512VLDQ-NEXT: retq |