summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/masked_store.ll
diff options
context:
space:
mode:
authorAnton Afanasyev <anton.a.afanasyev@gmail.com>2019-06-09 12:15:47 +0000
committerAnton Afanasyev <anton.a.afanasyev@gmail.com>2019-06-09 12:15:47 +0000
commit623d9ba068e6f11be992c5197a26f492573509ef (patch)
treeab388642cf50039003461fa43fc104bfd2d072d1 /llvm/test/CodeGen/X86/masked_store.ll
parentf18cf230e4e2a78fd575a5a1d260653b452f4d9d (diff)
downloadbcm5719-llvm-623d9ba068e6f11be992c5197a26f492573509ef.tar.gz
bcm5719-llvm-623d9ba068e6f11be992c5197a26f492573509ef.zip
[MIR] Add simple PRE pass to MachineCSE
This is the second part of the commit fixing PR38917 (hoisting partitially redundant machine instruction). Most of PRE (partitial redundancy elimination) and CSE work is done on LLVM IR, but some of redundancy arises during DAG legalization. Machine CSE is not enough to deal with it. This simple PRE implementation works a little bit intricately: it passes before CSE, looking for partitial redundancy and transforming it to fully redundancy, anticipating that the next CSE step will eliminate this created redundancy. If CSE doesn't eliminate this, than created instruction will remain dead and eliminated later by Remove Dead Machine Instructions pass. The third part of the commit is supposed to refactor MachineCSE, to make it more clear and to merge MachinePRE with MachineCSE, so one need no rely on further Remove Dead pass to clear instrs not eliminated by CSE. First step: https://reviews.llvm.org/D54839 Fixes llvm.org/PR38917 This is fixed recommit of r361356 after PowerPC64 multistage build failure. llvm-svn: 362901
Diffstat (limited to 'llvm/test/CodeGen/X86/masked_store.ll')
-rw-r--r--llvm/test/CodeGen/X86/masked_store.ll722
1 files changed, 305 insertions, 417 deletions
diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll
index 984b995877f..9df8363be4e 100644
--- a/llvm/test/CodeGen/X86/masked_store.ll
+++ b/llvm/test/CodeGen/X86/masked_store.ll
@@ -2151,69 +2151,62 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX1-NEXT: ## %bb.15: ## %cond.store13
; AVX1-NEXT: vpextrw $7, %xmm1, 14(%rdi)
; AVX1-NEXT: LBB14_16: ## %else14
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpextrb $0, %xmm2, %eax
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqw %xmm0, %xmm2, %xmm3
+; AVX1-NEXT: vpextrb $0, %xmm3, %eax
; AVX1-NEXT: testb $1, %al
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX1-NEXT: je LBB14_18
; AVX1-NEXT: ## %bb.17: ## %cond.store15
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpextrw $0, %xmm3, 16(%rdi)
+; AVX1-NEXT: vpextrw $0, %xmm0, 16(%rdi)
; AVX1-NEXT: LBB14_18: ## %else16
-; AVX1-NEXT: vpextrb $2, %xmm2, %eax
+; AVX1-NEXT: vpextrb $2, %xmm3, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB14_20
; AVX1-NEXT: ## %bb.19: ## %cond.store17
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpextrw $1, %xmm2, 18(%rdi)
+; AVX1-NEXT: vpextrw $1, %xmm0, 18(%rdi)
; AVX1-NEXT: LBB14_20: ## %else18
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpextrb $4, %xmm2, %eax
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $4, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB14_22
; AVX1-NEXT: ## %bb.21: ## %cond.store19
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpextrw $2, %xmm3, 20(%rdi)
+; AVX1-NEXT: vpextrw $2, %xmm0, 20(%rdi)
; AVX1-NEXT: LBB14_22: ## %else20
-; AVX1-NEXT: vpextrb $6, %xmm2, %eax
+; AVX1-NEXT: vpextrb $6, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB14_24
; AVX1-NEXT: ## %bb.23: ## %cond.store21
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpextrw $3, %xmm2, 22(%rdi)
+; AVX1-NEXT: vpextrw $3, %xmm0, 22(%rdi)
; AVX1-NEXT: LBB14_24: ## %else22
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpextrb $8, %xmm2, %eax
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $8, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB14_26
; AVX1-NEXT: ## %bb.25: ## %cond.store23
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpextrw $4, %xmm3, 24(%rdi)
+; AVX1-NEXT: vpextrw $4, %xmm0, 24(%rdi)
; AVX1-NEXT: LBB14_26: ## %else24
-; AVX1-NEXT: vpextrb $10, %xmm2, %eax
+; AVX1-NEXT: vpextrb $10, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB14_28
; AVX1-NEXT: ## %bb.27: ## %cond.store25
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpextrw $5, %xmm2, 26(%rdi)
+; AVX1-NEXT: vpextrw $5, %xmm0, 26(%rdi)
; AVX1-NEXT: LBB14_28: ## %else26
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $12, %xmm0, %eax
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $12, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB14_30
; AVX1-NEXT: ## %bb.29: ## %cond.store27
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpextrw $6, %xmm2, 28(%rdi)
+; AVX1-NEXT: vpextrw $6, %xmm0, 28(%rdi)
; AVX1-NEXT: LBB14_30: ## %else28
-; AVX1-NEXT: vpextrb $14, %xmm0, %eax
+; AVX1-NEXT: vpextrb $14, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB14_32
; AVX1-NEXT: ## %bb.31: ## %cond.store29
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX1-NEXT: vpextrw $7, %xmm0, 30(%rdi)
; AVX1-NEXT: LBB14_32: ## %else30
; AVX1-NEXT: vzeroupper
@@ -2282,17 +2275,16 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX2-NEXT: vpextrb $0, %xmm2, %eax
; AVX2-NEXT: testb $1, %al
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX2-NEXT: je LBB14_18
; AVX2-NEXT: ## %bb.17: ## %cond.store15
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpextrw $0, %xmm3, 16(%rdi)
+; AVX2-NEXT: vpextrw $0, %xmm1, 16(%rdi)
; AVX2-NEXT: LBB14_18: ## %else16
; AVX2-NEXT: vpextrb $2, %xmm2, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB14_20
; AVX2-NEXT: ## %bb.19: ## %cond.store17
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrw $1, %xmm2, 18(%rdi)
+; AVX2-NEXT: vpextrw $1, %xmm1, 18(%rdi)
; AVX2-NEXT: LBB14_20: ## %else18
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
@@ -2301,15 +2293,13 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB14_22
; AVX2-NEXT: ## %bb.21: ## %cond.store19
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpextrw $2, %xmm3, 20(%rdi)
+; AVX2-NEXT: vpextrw $2, %xmm1, 20(%rdi)
; AVX2-NEXT: LBB14_22: ## %else20
; AVX2-NEXT: vpextrb $6, %xmm2, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB14_24
; AVX2-NEXT: ## %bb.23: ## %cond.store21
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrw $3, %xmm2, 22(%rdi)
+; AVX2-NEXT: vpextrw $3, %xmm1, 22(%rdi)
; AVX2-NEXT: LBB14_24: ## %else22
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
@@ -2318,15 +2308,13 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB14_26
; AVX2-NEXT: ## %bb.25: ## %cond.store23
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpextrw $4, %xmm3, 24(%rdi)
+; AVX2-NEXT: vpextrw $4, %xmm1, 24(%rdi)
; AVX2-NEXT: LBB14_26: ## %else24
; AVX2-NEXT: vpextrb $10, %xmm2, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB14_28
; AVX2-NEXT: ## %bb.27: ## %cond.store25
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrw $5, %xmm2, 26(%rdi)
+; AVX2-NEXT: vpextrw $5, %xmm1, 26(%rdi)
; AVX2-NEXT: LBB14_28: ## %else26
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
@@ -2335,15 +2323,13 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB14_30
; AVX2-NEXT: ## %bb.29: ## %cond.store27
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrw $6, %xmm2, 28(%rdi)
+; AVX2-NEXT: vpextrw $6, %xmm1, 28(%rdi)
; AVX2-NEXT: LBB14_30: ## %else28
; AVX2-NEXT: vpextrb $14, %xmm0, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB14_32
; AVX2-NEXT: ## %bb.31: ## %cond.store29
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0
-; AVX2-NEXT: vpextrw $7, %xmm0, 30(%rdi)
+; AVX2-NEXT: vpextrw $7, %xmm1, 30(%rdi)
; AVX2-NEXT: LBB14_32: ## %else30
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -2428,18 +2414,17 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX512F-NEXT: kshiftrw $8, %k0, %k1
; AVX512F-NEXT: kmovw %k1, %eax
; AVX512F-NEXT: testb $1, %al
+; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX512F-NEXT: je LBB14_18
; AVX512F-NEXT: ## %bb.17: ## %cond.store15
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrw $0, %xmm2, 16(%rdi)
+; AVX512F-NEXT: vpextrw $0, %xmm1, 16(%rdi)
; AVX512F-NEXT: LBB14_18: ## %else16
; AVX512F-NEXT: kshiftrw $9, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB14_20
; AVX512F-NEXT: ## %bb.19: ## %cond.store17
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrw $1, %xmm2, 18(%rdi)
+; AVX512F-NEXT: vpextrw $1, %xmm1, 18(%rdi)
; AVX512F-NEXT: LBB14_20: ## %else18
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
@@ -2450,16 +2435,14 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB14_22
; AVX512F-NEXT: ## %bb.21: ## %cond.store19
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrw $2, %xmm2, 20(%rdi)
+; AVX512F-NEXT: vpextrw $2, %xmm1, 20(%rdi)
; AVX512F-NEXT: LBB14_22: ## %else20
; AVX512F-NEXT: kshiftrw $11, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB14_24
; AVX512F-NEXT: ## %bb.23: ## %cond.store21
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrw $3, %xmm2, 22(%rdi)
+; AVX512F-NEXT: vpextrw $3, %xmm1, 22(%rdi)
; AVX512F-NEXT: LBB14_24: ## %else22
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
@@ -2470,16 +2453,14 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB14_26
; AVX512F-NEXT: ## %bb.25: ## %cond.store23
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrw $4, %xmm2, 24(%rdi)
+; AVX512F-NEXT: vpextrw $4, %xmm1, 24(%rdi)
; AVX512F-NEXT: LBB14_26: ## %else24
; AVX512F-NEXT: kshiftrw $13, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB14_28
; AVX512F-NEXT: ## %bb.27: ## %cond.store25
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrw $5, %xmm2, 26(%rdi)
+; AVX512F-NEXT: vpextrw $5, %xmm1, 26(%rdi)
; AVX512F-NEXT: LBB14_28: ## %else26
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
@@ -2490,16 +2471,14 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB14_30
; AVX512F-NEXT: ## %bb.29: ## %cond.store27
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
-; AVX512F-NEXT: vpextrw $6, %xmm0, 28(%rdi)
+; AVX512F-NEXT: vpextrw $6, %xmm1, 28(%rdi)
; AVX512F-NEXT: LBB14_30: ## %else28
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB14_32
; AVX512F-NEXT: ## %bb.31: ## %cond.store29
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
-; AVX512F-NEXT: vpextrw $7, %xmm0, 30(%rdi)
+; AVX512F-NEXT: vpextrw $7, %xmm1, 30(%rdi)
; AVX512F-NEXT: LBB14_32: ## %else30
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
@@ -2584,18 +2563,17 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX512VLDQ-NEXT: kshiftrw $8, %k0, %k1
; AVX512VLDQ-NEXT: kmovw %k1, %eax
; AVX512VLDQ-NEXT: testb $1, %al
+; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX512VLDQ-NEXT: je LBB14_18
; AVX512VLDQ-NEXT: ## %bb.17: ## %cond.store15
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrw $0, %xmm2, 16(%rdi)
+; AVX512VLDQ-NEXT: vpextrw $0, %xmm1, 16(%rdi)
; AVX512VLDQ-NEXT: LBB14_18: ## %else16
; AVX512VLDQ-NEXT: kshiftrw $9, %k0, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB14_20
; AVX512VLDQ-NEXT: ## %bb.19: ## %cond.store17
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrw $1, %xmm2, 18(%rdi)
+; AVX512VLDQ-NEXT: vpextrw $1, %xmm1, 18(%rdi)
; AVX512VLDQ-NEXT: LBB14_20: ## %else18
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
@@ -2606,16 +2584,14 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB14_22
; AVX512VLDQ-NEXT: ## %bb.21: ## %cond.store19
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrw $2, %xmm2, 20(%rdi)
+; AVX512VLDQ-NEXT: vpextrw $2, %xmm1, 20(%rdi)
; AVX512VLDQ-NEXT: LBB14_22: ## %else20
; AVX512VLDQ-NEXT: kshiftrw $11, %k0, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB14_24
; AVX512VLDQ-NEXT: ## %bb.23: ## %cond.store21
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrw $3, %xmm2, 22(%rdi)
+; AVX512VLDQ-NEXT: vpextrw $3, %xmm1, 22(%rdi)
; AVX512VLDQ-NEXT: LBB14_24: ## %else22
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2
@@ -2626,16 +2602,14 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB14_26
; AVX512VLDQ-NEXT: ## %bb.25: ## %cond.store23
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrw $4, %xmm2, 24(%rdi)
+; AVX512VLDQ-NEXT: vpextrw $4, %xmm1, 24(%rdi)
; AVX512VLDQ-NEXT: LBB14_26: ## %else24
; AVX512VLDQ-NEXT: kshiftrw $13, %k0, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB14_28
; AVX512VLDQ-NEXT: ## %bb.27: ## %cond.store25
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrw $5, %xmm2, 26(%rdi)
+; AVX512VLDQ-NEXT: vpextrw $5, %xmm1, 26(%rdi)
; AVX512VLDQ-NEXT: LBB14_28: ## %else26
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
@@ -2646,16 +2620,14 @@ define void @store_v16i16_v16i16(<16 x i16> %trigger, <16 x i16>* %addr, <16 x i
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB14_30
; AVX512VLDQ-NEXT: ## %bb.29: ## %cond.store27
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm0
-; AVX512VLDQ-NEXT: vpextrw $6, %xmm0, 28(%rdi)
+; AVX512VLDQ-NEXT: vpextrw $6, %xmm1, 28(%rdi)
; AVX512VLDQ-NEXT: LBB14_30: ## %else28
; AVX512VLDQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB14_32
; AVX512VLDQ-NEXT: ## %bb.31: ## %cond.store29
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm0
-; AVX512VLDQ-NEXT: vpextrw $7, %xmm0, 30(%rdi)
+; AVX512VLDQ-NEXT: vpextrw $7, %xmm1, 30(%rdi)
; AVX512VLDQ-NEXT: LBB14_32: ## %else30
; AVX512VLDQ-NEXT: vzeroupper
; AVX512VLDQ-NEXT: retq
@@ -2680,134 +2652,126 @@ define void @store_v16i8_v16i8(<16 x i8> %trigger, <16 x i8>* %addr, <16 x i8> %
; SSE2: ## %bb.0:
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT: movd %xmm2, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: movd %xmm2, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: je LBB15_2
; SSE2-NEXT: ## %bb.1: ## %cond.store
-; SSE2-NEXT: movd %xmm1, %ecx
-; SSE2-NEXT: movb %cl, (%rdi)
+; SSE2-NEXT: movb %al, (%rdi)
; SSE2-NEXT: LBB15_2: ## %else
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB15_4
; SSE2-NEXT: ## %bb.3: ## %cond.store1
-; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: movb %ah, 1(%rdi)
; SSE2-NEXT: LBB15_4: ## %else2
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT: movd %xmm2, %eax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $16, %ecx
-; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: movd %xmm2, %ecx
+; SSE2-NEXT: movl %ecx, %edx
+; SSE2-NEXT: shrl $16, %edx
+; SSE2-NEXT: testb $1, %dl
; SSE2-NEXT: je LBB15_6
; SSE2-NEXT: ## %bb.5: ## %cond.store3
-; SSE2-NEXT: movd %xmm1, %ecx
-; SSE2-NEXT: shrl $16, %ecx
-; SSE2-NEXT: movb %cl, 2(%rdi)
+; SSE2-NEXT: movl %eax, %edx
+; SSE2-NEXT: shrl $16, %edx
+; SSE2-NEXT: movb %dl, 2(%rdi)
; SSE2-NEXT: LBB15_6: ## %else4
-; SSE2-NEXT: shrl $24, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $24, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB15_8
; SSE2-NEXT: ## %bb.7: ## %cond.store5
-; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: shrl $24, %eax
; SSE2-NEXT: movb %al, 3(%rdi)
; SSE2-NEXT: LBB15_8: ## %else6
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT: pextrw $2, %xmm2, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $2, %xmm2, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $2, %xmm1, %eax
; SSE2-NEXT: je LBB15_10
; SSE2-NEXT: ## %bb.9: ## %cond.store7
-; SSE2-NEXT: pextrw $2, %xmm1, %ecx
-; SSE2-NEXT: movb %cl, 4(%rdi)
+; SSE2-NEXT: movb %al, 4(%rdi)
; SSE2-NEXT: LBB15_10: ## %else8
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB15_12
; SSE2-NEXT: ## %bb.11: ## %cond.store9
-; SSE2-NEXT: pextrw $2, %xmm1, %eax
; SSE2-NEXT: movb %ah, 5(%rdi)
; SSE2-NEXT: LBB15_12: ## %else10
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT: pextrw $3, %xmm2, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $3, %xmm2, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $3, %xmm1, %eax
; SSE2-NEXT: je LBB15_14
; SSE2-NEXT: ## %bb.13: ## %cond.store11
-; SSE2-NEXT: pextrw $3, %xmm1, %ecx
-; SSE2-NEXT: movb %cl, 6(%rdi)
+; SSE2-NEXT: movb %al, 6(%rdi)
; SSE2-NEXT: LBB15_14: ## %else12
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB15_16
; SSE2-NEXT: ## %bb.15: ## %cond.store13
-; SSE2-NEXT: pextrw $3, %xmm1, %eax
; SSE2-NEXT: movb %ah, 7(%rdi)
; SSE2-NEXT: LBB15_16: ## %else14
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT: pextrw $4, %xmm2, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $4, %xmm2, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $4, %xmm1, %eax
; SSE2-NEXT: je LBB15_18
; SSE2-NEXT: ## %bb.17: ## %cond.store15
-; SSE2-NEXT: pextrw $4, %xmm1, %ecx
-; SSE2-NEXT: movb %cl, 8(%rdi)
+; SSE2-NEXT: movb %al, 8(%rdi)
; SSE2-NEXT: LBB15_18: ## %else16
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB15_20
; SSE2-NEXT: ## %bb.19: ## %cond.store17
-; SSE2-NEXT: pextrw $4, %xmm1, %eax
; SSE2-NEXT: movb %ah, 9(%rdi)
; SSE2-NEXT: LBB15_20: ## %else18
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT: pextrw $5, %xmm2, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $5, %xmm2, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $5, %xmm1, %eax
; SSE2-NEXT: je LBB15_22
; SSE2-NEXT: ## %bb.21: ## %cond.store19
-; SSE2-NEXT: pextrw $5, %xmm1, %ecx
-; SSE2-NEXT: movb %cl, 10(%rdi)
+; SSE2-NEXT: movb %al, 10(%rdi)
; SSE2-NEXT: LBB15_22: ## %else20
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB15_24
; SSE2-NEXT: ## %bb.23: ## %cond.store21
-; SSE2-NEXT: pextrw $5, %xmm1, %eax
; SSE2-NEXT: movb %ah, 11(%rdi)
; SSE2-NEXT: LBB15_24: ## %else22
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqb %xmm0, %xmm2
-; SSE2-NEXT: pextrw $6, %xmm2, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $6, %xmm2, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $6, %xmm1, %eax
; SSE2-NEXT: je LBB15_26
; SSE2-NEXT: ## %bb.25: ## %cond.store23
-; SSE2-NEXT: pextrw $6, %xmm1, %ecx
-; SSE2-NEXT: movb %cl, 12(%rdi)
+; SSE2-NEXT: movb %al, 12(%rdi)
; SSE2-NEXT: LBB15_26: ## %else24
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB15_28
; SSE2-NEXT: ## %bb.27: ## %cond.store25
-; SSE2-NEXT: pextrw $6, %xmm1, %eax
; SSE2-NEXT: movb %ah, 13(%rdi)
; SSE2-NEXT: LBB15_28: ## %else26
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pcmpeqb %xmm2, %xmm0
-; SSE2-NEXT: pextrw $7, %xmm0, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $7, %xmm0, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $7, %xmm1, %eax
; SSE2-NEXT: je LBB15_30
; SSE2-NEXT: ## %bb.29: ## %cond.store27
-; SSE2-NEXT: pextrw $7, %xmm1, %ecx
-; SSE2-NEXT: movb %cl, 14(%rdi)
+; SSE2-NEXT: movb %al, 14(%rdi)
; SSE2-NEXT: LBB15_30: ## %else28
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB15_32
; SSE2-NEXT: ## %bb.31: ## %cond.store29
-; SSE2-NEXT: pextrw $7, %xmm1, %eax
; SSE2-NEXT: movb %ah, 15(%rdi)
; SSE2-NEXT: LBB15_32: ## %else30
; SSE2-NEXT: retq
@@ -3355,266 +3319,250 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; SSE2: ## %bb.0:
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT: movd %xmm4, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: movd %xmm4, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: movd %xmm2, %eax
; SSE2-NEXT: je LBB16_2
; SSE2-NEXT: ## %bb.1: ## %cond.store
-; SSE2-NEXT: movd %xmm2, %ecx
-; SSE2-NEXT: movb %cl, (%rdi)
+; SSE2-NEXT: movb %al, (%rdi)
; SSE2-NEXT: LBB16_2: ## %else
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_4
; SSE2-NEXT: ## %bb.3: ## %cond.store1
-; SSE2-NEXT: movd %xmm2, %eax
; SSE2-NEXT: movb %ah, 1(%rdi)
; SSE2-NEXT: LBB16_4: ## %else2
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT: movd %xmm4, %eax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $16, %ecx
-; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: movd %xmm4, %ecx
+; SSE2-NEXT: movl %ecx, %edx
+; SSE2-NEXT: shrl $16, %edx
+; SSE2-NEXT: testb $1, %dl
; SSE2-NEXT: je LBB16_6
; SSE2-NEXT: ## %bb.5: ## %cond.store3
-; SSE2-NEXT: movd %xmm2, %ecx
-; SSE2-NEXT: shrl $16, %ecx
-; SSE2-NEXT: movb %cl, 2(%rdi)
+; SSE2-NEXT: movl %eax, %edx
+; SSE2-NEXT: shrl $16, %edx
+; SSE2-NEXT: movb %dl, 2(%rdi)
; SSE2-NEXT: LBB16_6: ## %else4
-; SSE2-NEXT: shrl $24, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $24, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_8
; SSE2-NEXT: ## %bb.7: ## %cond.store5
-; SSE2-NEXT: movd %xmm2, %eax
; SSE2-NEXT: shrl $24, %eax
; SSE2-NEXT: movb %al, 3(%rdi)
; SSE2-NEXT: LBB16_8: ## %else6
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT: pextrw $2, %xmm4, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $2, %xmm4, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $2, %xmm2, %eax
; SSE2-NEXT: je LBB16_10
; SSE2-NEXT: ## %bb.9: ## %cond.store7
-; SSE2-NEXT: pextrw $2, %xmm2, %ecx
-; SSE2-NEXT: movb %cl, 4(%rdi)
+; SSE2-NEXT: movb %al, 4(%rdi)
; SSE2-NEXT: LBB16_10: ## %else8
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_12
; SSE2-NEXT: ## %bb.11: ## %cond.store9
-; SSE2-NEXT: pextrw $2, %xmm2, %eax
; SSE2-NEXT: movb %ah, 5(%rdi)
; SSE2-NEXT: LBB16_12: ## %else10
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT: pextrw $3, %xmm4, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $3, %xmm4, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $3, %xmm2, %eax
; SSE2-NEXT: je LBB16_14
; SSE2-NEXT: ## %bb.13: ## %cond.store11
-; SSE2-NEXT: pextrw $3, %xmm2, %ecx
-; SSE2-NEXT: movb %cl, 6(%rdi)
+; SSE2-NEXT: movb %al, 6(%rdi)
; SSE2-NEXT: LBB16_14: ## %else12
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_16
; SSE2-NEXT: ## %bb.15: ## %cond.store13
-; SSE2-NEXT: pextrw $3, %xmm2, %eax
; SSE2-NEXT: movb %ah, 7(%rdi)
; SSE2-NEXT: LBB16_16: ## %else14
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT: pextrw $4, %xmm4, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $4, %xmm4, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $4, %xmm2, %eax
; SSE2-NEXT: je LBB16_18
; SSE2-NEXT: ## %bb.17: ## %cond.store15
-; SSE2-NEXT: pextrw $4, %xmm2, %ecx
-; SSE2-NEXT: movb %cl, 8(%rdi)
+; SSE2-NEXT: movb %al, 8(%rdi)
; SSE2-NEXT: LBB16_18: ## %else16
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_20
; SSE2-NEXT: ## %bb.19: ## %cond.store17
-; SSE2-NEXT: pextrw $4, %xmm2, %eax
; SSE2-NEXT: movb %ah, 9(%rdi)
; SSE2-NEXT: LBB16_20: ## %else18
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT: pextrw $5, %xmm4, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $5, %xmm4, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $5, %xmm2, %eax
; SSE2-NEXT: je LBB16_22
; SSE2-NEXT: ## %bb.21: ## %cond.store19
-; SSE2-NEXT: pextrw $5, %xmm2, %ecx
-; SSE2-NEXT: movb %cl, 10(%rdi)
+; SSE2-NEXT: movb %al, 10(%rdi)
; SSE2-NEXT: LBB16_22: ## %else20
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_24
; SSE2-NEXT: ## %bb.23: ## %cond.store21
-; SSE2-NEXT: pextrw $5, %xmm2, %eax
; SSE2-NEXT: movb %ah, 11(%rdi)
; SSE2-NEXT: LBB16_24: ## %else22
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: pcmpeqb %xmm0, %xmm4
-; SSE2-NEXT: pextrw $6, %xmm4, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $6, %xmm4, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $6, %xmm2, %eax
; SSE2-NEXT: je LBB16_26
; SSE2-NEXT: ## %bb.25: ## %cond.store23
-; SSE2-NEXT: pextrw $6, %xmm2, %ecx
-; SSE2-NEXT: movb %cl, 12(%rdi)
+; SSE2-NEXT: movb %al, 12(%rdi)
; SSE2-NEXT: LBB16_26: ## %else24
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_28
; SSE2-NEXT: ## %bb.27: ## %cond.store25
-; SSE2-NEXT: pextrw $6, %xmm2, %eax
; SSE2-NEXT: movb %ah, 13(%rdi)
; SSE2-NEXT: LBB16_28: ## %else26
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: pcmpeqb %xmm4, %xmm0
-; SSE2-NEXT: pextrw $7, %xmm0, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $7, %xmm0, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $7, %xmm2, %eax
; SSE2-NEXT: je LBB16_30
; SSE2-NEXT: ## %bb.29: ## %cond.store27
-; SSE2-NEXT: pextrw $7, %xmm2, %ecx
-; SSE2-NEXT: movb %cl, 14(%rdi)
+; SSE2-NEXT: movb %al, 14(%rdi)
; SSE2-NEXT: LBB16_30: ## %else28
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_32
; SSE2-NEXT: ## %bb.31: ## %cond.store29
-; SSE2-NEXT: pextrw $7, %xmm2, %eax
; SSE2-NEXT: movb %ah, 15(%rdi)
; SSE2-NEXT: LBB16_32: ## %else30
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: movd %xmm0, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: movd %xmm3, %eax
; SSE2-NEXT: je LBB16_34
; SSE2-NEXT: ## %bb.33: ## %cond.store31
-; SSE2-NEXT: movd %xmm3, %ecx
-; SSE2-NEXT: movb %cl, 16(%rdi)
+; SSE2-NEXT: movb %al, 16(%rdi)
; SSE2-NEXT: LBB16_34: ## %else32
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_36
; SSE2-NEXT: ## %bb.35: ## %cond.store33
-; SSE2-NEXT: movd %xmm3, %eax
; SSE2-NEXT: movb %ah, 17(%rdi)
; SSE2-NEXT: LBB16_36: ## %else34
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $16, %ecx
-; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: movd %xmm0, %ecx
+; SSE2-NEXT: movl %ecx, %edx
+; SSE2-NEXT: shrl $16, %edx
+; SSE2-NEXT: testb $1, %dl
; SSE2-NEXT: je LBB16_38
; SSE2-NEXT: ## %bb.37: ## %cond.store35
-; SSE2-NEXT: movd %xmm3, %ecx
-; SSE2-NEXT: shrl $16, %ecx
-; SSE2-NEXT: movb %cl, 18(%rdi)
+; SSE2-NEXT: movl %eax, %edx
+; SSE2-NEXT: shrl $16, %edx
+; SSE2-NEXT: movb %dl, 18(%rdi)
; SSE2-NEXT: LBB16_38: ## %else36
-; SSE2-NEXT: shrl $24, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $24, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_40
; SSE2-NEXT: ## %bb.39: ## %cond.store37
-; SSE2-NEXT: movd %xmm3, %eax
; SSE2-NEXT: shrl $24, %eax
; SSE2-NEXT: movb %al, 19(%rdi)
; SSE2-NEXT: LBB16_40: ## %else38
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: pextrw $2, %xmm0, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $2, %xmm0, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $2, %xmm3, %eax
; SSE2-NEXT: je LBB16_42
; SSE2-NEXT: ## %bb.41: ## %cond.store39
-; SSE2-NEXT: pextrw $2, %xmm3, %ecx
-; SSE2-NEXT: movb %cl, 20(%rdi)
+; SSE2-NEXT: movb %al, 20(%rdi)
; SSE2-NEXT: LBB16_42: ## %else40
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_44
; SSE2-NEXT: ## %bb.43: ## %cond.store41
-; SSE2-NEXT: pextrw $2, %xmm3, %eax
; SSE2-NEXT: movb %ah, 21(%rdi)
; SSE2-NEXT: LBB16_44: ## %else42
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: pextrw $3, %xmm0, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $3, %xmm0, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $3, %xmm3, %eax
; SSE2-NEXT: je LBB16_46
; SSE2-NEXT: ## %bb.45: ## %cond.store43
-; SSE2-NEXT: pextrw $3, %xmm3, %ecx
-; SSE2-NEXT: movb %cl, 22(%rdi)
+; SSE2-NEXT: movb %al, 22(%rdi)
; SSE2-NEXT: LBB16_46: ## %else44
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_48
; SSE2-NEXT: ## %bb.47: ## %cond.store45
-; SSE2-NEXT: pextrw $3, %xmm3, %eax
; SSE2-NEXT: movb %ah, 23(%rdi)
; SSE2-NEXT: LBB16_48: ## %else46
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: pextrw $4, %xmm0, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $4, %xmm0, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $4, %xmm3, %eax
; SSE2-NEXT: je LBB16_50
; SSE2-NEXT: ## %bb.49: ## %cond.store47
-; SSE2-NEXT: pextrw $4, %xmm3, %ecx
-; SSE2-NEXT: movb %cl, 24(%rdi)
+; SSE2-NEXT: movb %al, 24(%rdi)
; SSE2-NEXT: LBB16_50: ## %else48
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_52
; SSE2-NEXT: ## %bb.51: ## %cond.store49
-; SSE2-NEXT: pextrw $4, %xmm3, %eax
; SSE2-NEXT: movb %ah, 25(%rdi)
; SSE2-NEXT: LBB16_52: ## %else50
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: pextrw $5, %xmm0, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $5, %xmm0, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $5, %xmm3, %eax
; SSE2-NEXT: je LBB16_54
; SSE2-NEXT: ## %bb.53: ## %cond.store51
-; SSE2-NEXT: pextrw $5, %xmm3, %ecx
-; SSE2-NEXT: movb %cl, 26(%rdi)
+; SSE2-NEXT: movb %al, 26(%rdi)
; SSE2-NEXT: LBB16_54: ## %else52
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_56
; SSE2-NEXT: ## %bb.55: ## %cond.store53
-; SSE2-NEXT: pextrw $5, %xmm3, %eax
; SSE2-NEXT: movb %ah, 27(%rdi)
; SSE2-NEXT: LBB16_56: ## %else54
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; SSE2-NEXT: pextrw $6, %xmm0, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $6, %xmm0, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $6, %xmm3, %eax
; SSE2-NEXT: je LBB16_58
; SSE2-NEXT: ## %bb.57: ## %cond.store55
-; SSE2-NEXT: pextrw $6, %xmm3, %ecx
-; SSE2-NEXT: movb %cl, 28(%rdi)
+; SSE2-NEXT: movb %al, 28(%rdi)
; SSE2-NEXT: LBB16_58: ## %else56
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_60
; SSE2-NEXT: ## %bb.59: ## %cond.store57
-; SSE2-NEXT: pextrw $6, %xmm3, %eax
; SSE2-NEXT: movb %ah, 29(%rdi)
; SSE2-NEXT: LBB16_60: ## %else58
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
-; SSE2-NEXT: pextrw $7, %xmm1, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: pextrw $7, %xmm1, %ecx
+; SSE2-NEXT: testb $1, %cl
+; SSE2-NEXT: pextrw $7, %xmm3, %eax
; SSE2-NEXT: je LBB16_62
; SSE2-NEXT: ## %bb.61: ## %cond.store59
-; SSE2-NEXT: pextrw $7, %xmm3, %ecx
-; SSE2-NEXT: movb %cl, 30(%rdi)
+; SSE2-NEXT: movb %al, 30(%rdi)
; SSE2-NEXT: LBB16_62: ## %else60
-; SSE2-NEXT: shrl $8, %eax
-; SSE2-NEXT: testb $1, %al
+; SSE2-NEXT: shrl $8, %ecx
+; SSE2-NEXT: testb $1, %cl
; SSE2-NEXT: je LBB16_64
; SSE2-NEXT: ## %bb.63: ## %cond.store61
-; SSE2-NEXT: pextrw $7, %xmm3, %eax
; SSE2-NEXT: movb %ah, 31(%rdi)
; SSE2-NEXT: LBB16_64: ## %else62
; SSE2-NEXT: retq
@@ -3969,133 +3917,118 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX1-NEXT: ## %bb.31: ## %cond.store29
; AVX1-NEXT: vpextrb $15, %xmm1, 15(%rdi)
; AVX1-NEXT: LBB16_32: ## %else30
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpextrb $0, %xmm2, %eax
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpeqb %xmm0, %xmm2, %xmm3
+; AVX1-NEXT: vpextrb $0, %xmm3, %eax
; AVX1-NEXT: testb $1, %al
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX1-NEXT: je LBB16_34
; AVX1-NEXT: ## %bb.33: ## %cond.store31
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpextrb $0, %xmm3, 16(%rdi)
+; AVX1-NEXT: vpextrb $0, %xmm0, 16(%rdi)
; AVX1-NEXT: LBB16_34: ## %else32
-; AVX1-NEXT: vpextrb $1, %xmm2, %eax
+; AVX1-NEXT: vpextrb $1, %xmm3, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_36
; AVX1-NEXT: ## %bb.35: ## %cond.store33
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpextrb $1, %xmm2, 17(%rdi)
+; AVX1-NEXT: vpextrb $1, %xmm0, 17(%rdi)
; AVX1-NEXT: LBB16_36: ## %else34
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpextrb $2, %xmm2, %eax
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $2, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_38
; AVX1-NEXT: ## %bb.37: ## %cond.store35
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpextrb $2, %xmm3, 18(%rdi)
+; AVX1-NEXT: vpextrb $2, %xmm0, 18(%rdi)
; AVX1-NEXT: LBB16_38: ## %else36
-; AVX1-NEXT: vpextrb $3, %xmm2, %eax
+; AVX1-NEXT: vpextrb $3, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_40
; AVX1-NEXT: ## %bb.39: ## %cond.store37
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpextrb $3, %xmm2, 19(%rdi)
+; AVX1-NEXT: vpextrb $3, %xmm0, 19(%rdi)
; AVX1-NEXT: LBB16_40: ## %else38
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpextrb $4, %xmm2, %eax
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $4, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_42
; AVX1-NEXT: ## %bb.41: ## %cond.store39
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpextrb $4, %xmm3, 20(%rdi)
+; AVX1-NEXT: vpextrb $4, %xmm0, 20(%rdi)
; AVX1-NEXT: LBB16_42: ## %else40
-; AVX1-NEXT: vpextrb $5, %xmm2, %eax
+; AVX1-NEXT: vpextrb $5, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_44
; AVX1-NEXT: ## %bb.43: ## %cond.store41
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpextrb $5, %xmm2, 21(%rdi)
+; AVX1-NEXT: vpextrb $5, %xmm0, 21(%rdi)
; AVX1-NEXT: LBB16_44: ## %else42
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpextrb $6, %xmm2, %eax
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $6, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_46
; AVX1-NEXT: ## %bb.45: ## %cond.store43
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpextrb $6, %xmm3, 22(%rdi)
+; AVX1-NEXT: vpextrb $6, %xmm0, 22(%rdi)
; AVX1-NEXT: LBB16_46: ## %else44
-; AVX1-NEXT: vpextrb $7, %xmm2, %eax
+; AVX1-NEXT: vpextrb $7, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_48
; AVX1-NEXT: ## %bb.47: ## %cond.store45
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpextrb $7, %xmm2, 23(%rdi)
+; AVX1-NEXT: vpextrb $7, %xmm0, 23(%rdi)
; AVX1-NEXT: LBB16_48: ## %else46
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpextrb $8, %xmm2, %eax
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $8, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_50
; AVX1-NEXT: ## %bb.49: ## %cond.store47
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpextrb $8, %xmm3, 24(%rdi)
+; AVX1-NEXT: vpextrb $8, %xmm0, 24(%rdi)
; AVX1-NEXT: LBB16_50: ## %else48
-; AVX1-NEXT: vpextrb $9, %xmm2, %eax
+; AVX1-NEXT: vpextrb $9, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_52
; AVX1-NEXT: ## %bb.51: ## %cond.store49
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpextrb $9, %xmm2, 25(%rdi)
+; AVX1-NEXT: vpextrb $9, %xmm0, 25(%rdi)
; AVX1-NEXT: LBB16_52: ## %else50
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpextrb $10, %xmm2, %eax
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $10, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_54
; AVX1-NEXT: ## %bb.53: ## %cond.store51
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpextrb $10, %xmm3, 26(%rdi)
+; AVX1-NEXT: vpextrb $10, %xmm0, 26(%rdi)
; AVX1-NEXT: LBB16_54: ## %else52
-; AVX1-NEXT: vpextrb $11, %xmm2, %eax
+; AVX1-NEXT: vpextrb $11, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_56
; AVX1-NEXT: ## %bb.55: ## %cond.store53
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpextrb $11, %xmm2, 27(%rdi)
+; AVX1-NEXT: vpextrb $11, %xmm0, 27(%rdi)
; AVX1-NEXT: LBB16_56: ## %else54
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpextrb $12, %xmm2, %eax
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $12, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_58
; AVX1-NEXT: ## %bb.57: ## %cond.store55
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpextrb $12, %xmm3, 28(%rdi)
+; AVX1-NEXT: vpextrb $12, %xmm0, 28(%rdi)
; AVX1-NEXT: LBB16_58: ## %else56
-; AVX1-NEXT: vpextrb $13, %xmm2, %eax
+; AVX1-NEXT: vpextrb $13, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_60
; AVX1-NEXT: ## %bb.59: ## %cond.store57
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpextrb $13, %xmm2, 29(%rdi)
+; AVX1-NEXT: vpextrb $13, %xmm0, 29(%rdi)
; AVX1-NEXT: LBB16_60: ## %else58
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpextrb $14, %xmm0, %eax
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vpextrb $14, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_62
; AVX1-NEXT: ## %bb.61: ## %cond.store59
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpextrb $14, %xmm2, 30(%rdi)
+; AVX1-NEXT: vpextrb $14, %xmm0, 30(%rdi)
; AVX1-NEXT: LBB16_62: ## %else60
-; AVX1-NEXT: vpextrb $15, %xmm0, %eax
+; AVX1-NEXT: vpextrb $15, %xmm1, %eax
; AVX1-NEXT: testb $1, %al
; AVX1-NEXT: je LBB16_64
; AVX1-NEXT: ## %bb.63: ## %cond.store61
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
; AVX1-NEXT: vpextrb $15, %xmm0, 31(%rdi)
; AVX1-NEXT: LBB16_64: ## %else62
; AVX1-NEXT: vzeroupper
@@ -4220,17 +4153,16 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX2-NEXT: vpextrb $0, %xmm2, %eax
; AVX2-NEXT: testb $1, %al
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX2-NEXT: je LBB16_34
; AVX2-NEXT: ## %bb.33: ## %cond.store31
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpextrb $0, %xmm3, 16(%rdi)
+; AVX2-NEXT: vpextrb $0, %xmm1, 16(%rdi)
; AVX2-NEXT: LBB16_34: ## %else32
; AVX2-NEXT: vpextrb $1, %xmm2, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_36
; AVX2-NEXT: ## %bb.35: ## %cond.store33
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrb $1, %xmm2, 17(%rdi)
+; AVX2-NEXT: vpextrb $1, %xmm1, 17(%rdi)
; AVX2-NEXT: LBB16_36: ## %else34
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4239,15 +4171,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_38
; AVX2-NEXT: ## %bb.37: ## %cond.store35
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpextrb $2, %xmm3, 18(%rdi)
+; AVX2-NEXT: vpextrb $2, %xmm1, 18(%rdi)
; AVX2-NEXT: LBB16_38: ## %else36
; AVX2-NEXT: vpextrb $3, %xmm2, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_40
; AVX2-NEXT: ## %bb.39: ## %cond.store37
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrb $3, %xmm2, 19(%rdi)
+; AVX2-NEXT: vpextrb $3, %xmm1, 19(%rdi)
; AVX2-NEXT: LBB16_40: ## %else38
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4256,15 +4186,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_42
; AVX2-NEXT: ## %bb.41: ## %cond.store39
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpextrb $4, %xmm3, 20(%rdi)
+; AVX2-NEXT: vpextrb $4, %xmm1, 20(%rdi)
; AVX2-NEXT: LBB16_42: ## %else40
; AVX2-NEXT: vpextrb $5, %xmm2, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_44
; AVX2-NEXT: ## %bb.43: ## %cond.store41
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrb $5, %xmm2, 21(%rdi)
+; AVX2-NEXT: vpextrb $5, %xmm1, 21(%rdi)
; AVX2-NEXT: LBB16_44: ## %else42
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4273,15 +4201,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_46
; AVX2-NEXT: ## %bb.45: ## %cond.store43
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpextrb $6, %xmm3, 22(%rdi)
+; AVX2-NEXT: vpextrb $6, %xmm1, 22(%rdi)
; AVX2-NEXT: LBB16_46: ## %else44
; AVX2-NEXT: vpextrb $7, %xmm2, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_48
; AVX2-NEXT: ## %bb.47: ## %cond.store45
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrb $7, %xmm2, 23(%rdi)
+; AVX2-NEXT: vpextrb $7, %xmm1, 23(%rdi)
; AVX2-NEXT: LBB16_48: ## %else46
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4290,15 +4216,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_50
; AVX2-NEXT: ## %bb.49: ## %cond.store47
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpextrb $8, %xmm3, 24(%rdi)
+; AVX2-NEXT: vpextrb $8, %xmm1, 24(%rdi)
; AVX2-NEXT: LBB16_50: ## %else48
; AVX2-NEXT: vpextrb $9, %xmm2, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_52
; AVX2-NEXT: ## %bb.51: ## %cond.store49
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrb $9, %xmm2, 25(%rdi)
+; AVX2-NEXT: vpextrb $9, %xmm1, 25(%rdi)
; AVX2-NEXT: LBB16_52: ## %else50
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4307,15 +4231,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_54
; AVX2-NEXT: ## %bb.53: ## %cond.store51
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpextrb $10, %xmm3, 26(%rdi)
+; AVX2-NEXT: vpextrb $10, %xmm1, 26(%rdi)
; AVX2-NEXT: LBB16_54: ## %else52
; AVX2-NEXT: vpextrb $11, %xmm2, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_56
; AVX2-NEXT: ## %bb.55: ## %cond.store53
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrb $11, %xmm2, 27(%rdi)
+; AVX2-NEXT: vpextrb $11, %xmm1, 27(%rdi)
; AVX2-NEXT: LBB16_56: ## %else54
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4324,15 +4246,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_58
; AVX2-NEXT: ## %bb.57: ## %cond.store55
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT: vpextrb $12, %xmm3, 28(%rdi)
+; AVX2-NEXT: vpextrb $12, %xmm1, 28(%rdi)
; AVX2-NEXT: LBB16_58: ## %else56
; AVX2-NEXT: vpextrb $13, %xmm2, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_60
; AVX2-NEXT: ## %bb.59: ## %cond.store57
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrb $13, %xmm2, 29(%rdi)
+; AVX2-NEXT: vpextrb $13, %xmm1, 29(%rdi)
; AVX2-NEXT: LBB16_60: ## %else58
; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
@@ -4341,15 +4261,13 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_62
; AVX2-NEXT: ## %bb.61: ## %cond.store59
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpextrb $14, %xmm2, 30(%rdi)
+; AVX2-NEXT: vpextrb $14, %xmm1, 30(%rdi)
; AVX2-NEXT: LBB16_62: ## %else60
; AVX2-NEXT: vpextrb $15, %xmm0, %eax
; AVX2-NEXT: testb $1, %al
; AVX2-NEXT: je LBB16_64
; AVX2-NEXT: ## %bb.63: ## %cond.store61
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm0
-; AVX2-NEXT: vpextrb $15, %xmm0, 31(%rdi)
+; AVX2-NEXT: vpextrb $15, %xmm1, 31(%rdi)
; AVX2-NEXT: LBB16_64: ## %else62
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -4522,18 +4440,17 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
+; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX512F-NEXT: je LBB16_34
; AVX512F-NEXT: ## %bb.33: ## %cond.store31
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $0, %xmm2, 16(%rdi)
+; AVX512F-NEXT: vpextrb $0, %xmm1, 16(%rdi)
; AVX512F-NEXT: LBB16_34: ## %else32
; AVX512F-NEXT: kshiftrw $1, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_36
; AVX512F-NEXT: ## %bb.35: ## %cond.store33
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $1, %xmm2, 17(%rdi)
+; AVX512F-NEXT: vpextrb $1, %xmm1, 17(%rdi)
; AVX512F-NEXT: LBB16_36: ## %else34
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4545,16 +4462,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_38
; AVX512F-NEXT: ## %bb.37: ## %cond.store35
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $2, %xmm2, 18(%rdi)
+; AVX512F-NEXT: vpextrb $2, %xmm1, 18(%rdi)
; AVX512F-NEXT: LBB16_38: ## %else36
; AVX512F-NEXT: kshiftrw $3, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_40
; AVX512F-NEXT: ## %bb.39: ## %cond.store37
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $3, %xmm2, 19(%rdi)
+; AVX512F-NEXT: vpextrb $3, %xmm1, 19(%rdi)
; AVX512F-NEXT: LBB16_40: ## %else38
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4566,16 +4481,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_42
; AVX512F-NEXT: ## %bb.41: ## %cond.store39
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $4, %xmm2, 20(%rdi)
+; AVX512F-NEXT: vpextrb $4, %xmm1, 20(%rdi)
; AVX512F-NEXT: LBB16_42: ## %else40
; AVX512F-NEXT: kshiftrw $5, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_44
; AVX512F-NEXT: ## %bb.43: ## %cond.store41
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $5, %xmm2, 21(%rdi)
+; AVX512F-NEXT: vpextrb $5, %xmm1, 21(%rdi)
; AVX512F-NEXT: LBB16_44: ## %else42
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4587,16 +4500,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_46
; AVX512F-NEXT: ## %bb.45: ## %cond.store43
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $6, %xmm2, 22(%rdi)
+; AVX512F-NEXT: vpextrb $6, %xmm1, 22(%rdi)
; AVX512F-NEXT: LBB16_46: ## %else44
; AVX512F-NEXT: kshiftrw $7, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_48
; AVX512F-NEXT: ## %bb.47: ## %cond.store45
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $7, %xmm2, 23(%rdi)
+; AVX512F-NEXT: vpextrb $7, %xmm1, 23(%rdi)
; AVX512F-NEXT: LBB16_48: ## %else46
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4608,16 +4519,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_50
; AVX512F-NEXT: ## %bb.49: ## %cond.store47
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $8, %xmm2, 24(%rdi)
+; AVX512F-NEXT: vpextrb $8, %xmm1, 24(%rdi)
; AVX512F-NEXT: LBB16_50: ## %else48
; AVX512F-NEXT: kshiftrw $9, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_52
; AVX512F-NEXT: ## %bb.51: ## %cond.store49
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $9, %xmm2, 25(%rdi)
+; AVX512F-NEXT: vpextrb $9, %xmm1, 25(%rdi)
; AVX512F-NEXT: LBB16_52: ## %else50
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4629,16 +4538,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_54
; AVX512F-NEXT: ## %bb.53: ## %cond.store51
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $10, %xmm2, 26(%rdi)
+; AVX512F-NEXT: vpextrb $10, %xmm1, 26(%rdi)
; AVX512F-NEXT: LBB16_54: ## %else52
; AVX512F-NEXT: kshiftrw $11, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_56
; AVX512F-NEXT: ## %bb.55: ## %cond.store53
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $11, %xmm2, 27(%rdi)
+; AVX512F-NEXT: vpextrb $11, %xmm1, 27(%rdi)
; AVX512F-NEXT: LBB16_56: ## %else54
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4650,16 +4557,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_58
; AVX512F-NEXT: ## %bb.57: ## %cond.store55
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $12, %xmm2, 28(%rdi)
+; AVX512F-NEXT: vpextrb $12, %xmm1, 28(%rdi)
; AVX512F-NEXT: LBB16_58: ## %else56
; AVX512F-NEXT: kshiftrw $13, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_60
; AVX512F-NEXT: ## %bb.59: ## %cond.store57
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpextrb $13, %xmm2, 29(%rdi)
+; AVX512F-NEXT: vpextrb $13, %xmm1, 29(%rdi)
; AVX512F-NEXT: LBB16_60: ## %else58
; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
@@ -4671,16 +4576,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_62
; AVX512F-NEXT: ## %bb.61: ## %cond.store59
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
-; AVX512F-NEXT: vpextrb $14, %xmm0, 30(%rdi)
+; AVX512F-NEXT: vpextrb $14, %xmm1, 30(%rdi)
; AVX512F-NEXT: LBB16_62: ## %else60
; AVX512F-NEXT: kshiftrw $15, %k0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: je LBB16_64
; AVX512F-NEXT: ## %bb.63: ## %cond.store61
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
-; AVX512F-NEXT: vpextrb $15, %xmm0, 31(%rdi)
+; AVX512F-NEXT: vpextrb $15, %xmm1, 31(%rdi)
; AVX512F-NEXT: LBB16_64: ## %else62
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
@@ -4853,18 +4756,17 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512VLDQ-NEXT: vpmovd2m %zmm2, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
+; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX512VLDQ-NEXT: je LBB16_34
; AVX512VLDQ-NEXT: ## %bb.33: ## %cond.store31
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $0, %xmm2, 16(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $0, %xmm1, 16(%rdi)
; AVX512VLDQ-NEXT: LBB16_34: ## %else32
; AVX512VLDQ-NEXT: kshiftrw $1, %k0, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_36
; AVX512VLDQ-NEXT: ## %bb.35: ## %cond.store33
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $1, %xmm2, 17(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $1, %xmm1, 17(%rdi)
; AVX512VLDQ-NEXT: LBB16_36: ## %else34
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4876,16 +4778,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_38
; AVX512VLDQ-NEXT: ## %bb.37: ## %cond.store35
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $2, %xmm2, 18(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $2, %xmm1, 18(%rdi)
; AVX512VLDQ-NEXT: LBB16_38: ## %else36
; AVX512VLDQ-NEXT: kshiftrw $3, %k0, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_40
; AVX512VLDQ-NEXT: ## %bb.39: ## %cond.store37
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $3, %xmm2, 19(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $3, %xmm1, 19(%rdi)
; AVX512VLDQ-NEXT: LBB16_40: ## %else38
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4897,16 +4797,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_42
; AVX512VLDQ-NEXT: ## %bb.41: ## %cond.store39
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $4, %xmm2, 20(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $4, %xmm1, 20(%rdi)
; AVX512VLDQ-NEXT: LBB16_42: ## %else40
; AVX512VLDQ-NEXT: kshiftrw $5, %k0, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_44
; AVX512VLDQ-NEXT: ## %bb.43: ## %cond.store41
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $5, %xmm2, 21(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $5, %xmm1, 21(%rdi)
; AVX512VLDQ-NEXT: LBB16_44: ## %else42
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4918,16 +4816,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_46
; AVX512VLDQ-NEXT: ## %bb.45: ## %cond.store43
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $6, %xmm2, 22(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $6, %xmm1, 22(%rdi)
; AVX512VLDQ-NEXT: LBB16_46: ## %else44
; AVX512VLDQ-NEXT: kshiftrw $7, %k0, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_48
; AVX512VLDQ-NEXT: ## %bb.47: ## %cond.store45
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $7, %xmm2, 23(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $7, %xmm1, 23(%rdi)
; AVX512VLDQ-NEXT: LBB16_48: ## %else46
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4939,16 +4835,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_50
; AVX512VLDQ-NEXT: ## %bb.49: ## %cond.store47
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $8, %xmm2, 24(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $8, %xmm1, 24(%rdi)
; AVX512VLDQ-NEXT: LBB16_50: ## %else48
; AVX512VLDQ-NEXT: kshiftrw $9, %k0, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_52
; AVX512VLDQ-NEXT: ## %bb.51: ## %cond.store49
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $9, %xmm2, 25(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $9, %xmm1, 25(%rdi)
; AVX512VLDQ-NEXT: LBB16_52: ## %else50
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4960,16 +4854,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_54
; AVX512VLDQ-NEXT: ## %bb.53: ## %cond.store51
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $10, %xmm2, 26(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $10, %xmm1, 26(%rdi)
; AVX512VLDQ-NEXT: LBB16_54: ## %else52
; AVX512VLDQ-NEXT: kshiftrw $11, %k0, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_56
; AVX512VLDQ-NEXT: ## %bb.55: ## %cond.store53
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $11, %xmm2, 27(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $11, %xmm1, 27(%rdi)
; AVX512VLDQ-NEXT: LBB16_56: ## %else54
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm2
@@ -4981,16 +4873,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_58
; AVX512VLDQ-NEXT: ## %bb.57: ## %cond.store55
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $12, %xmm2, 28(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $12, %xmm1, 28(%rdi)
; AVX512VLDQ-NEXT: LBB16_58: ## %else56
; AVX512VLDQ-NEXT: kshiftrw $13, %k0, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_60
; AVX512VLDQ-NEXT: ## %bb.59: ## %cond.store57
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512VLDQ-NEXT: vpextrb $13, %xmm2, 29(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $13, %xmm1, 29(%rdi)
; AVX512VLDQ-NEXT: LBB16_60: ## %else58
; AVX512VLDQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VLDQ-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
@@ -5002,16 +4892,14 @@ define void @store_v32i8_v32i8(<32 x i8> %trigger, <32 x i8>* %addr, <32 x i8> %
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_62
; AVX512VLDQ-NEXT: ## %bb.61: ## %cond.store59
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm0
-; AVX512VLDQ-NEXT: vpextrb $14, %xmm0, 30(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $14, %xmm1, 30(%rdi)
; AVX512VLDQ-NEXT: LBB16_62: ## %else60
; AVX512VLDQ-NEXT: kshiftrw $15, %k0, %k0
; AVX512VLDQ-NEXT: kmovw %k0, %eax
; AVX512VLDQ-NEXT: testb $1, %al
; AVX512VLDQ-NEXT: je LBB16_64
; AVX512VLDQ-NEXT: ## %bb.63: ## %cond.store61
-; AVX512VLDQ-NEXT: vextracti128 $1, %ymm1, %xmm0
-; AVX512VLDQ-NEXT: vpextrb $15, %xmm0, 31(%rdi)
+; AVX512VLDQ-NEXT: vpextrb $15, %xmm1, 31(%rdi)
; AVX512VLDQ-NEXT: LBB16_64: ## %else62
; AVX512VLDQ-NEXT: vzeroupper
; AVX512VLDQ-NEXT: retq
OpenPOWER on IntegriCloud