summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/masked_gather_scatter.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/masked_gather_scatter.ll')
-rw-r--r--llvm/test/CodeGen/X86/masked_gather_scatter.ll426
1 files changed, 310 insertions, 116 deletions
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 85cfc6234fb..61dc7c7724e 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_64
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_32
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=SKX
@@ -38,6 +39,14 @@ define <16 x float> @test1(float* %base, <16 x i32> %ind) {
; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test1:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
@@ -87,6 +96,14 @@ define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test2:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
@@ -120,6 +137,14 @@ define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) {
; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test3:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm0
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
@@ -163,6 +188,17 @@ define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
; SKX-NEXT: vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test4:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; SKX_32-NEXT: kmovw %k1, %k2
+; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2}
+; SKX_32-NEXT: vmovdqa64 %zmm1, %zmm2
+; SKX_32-NEXT: vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1}
+; SKX_32-NEXT: vpaddd %zmm2, %zmm1, %zmm0
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
@@ -215,6 +251,15 @@ define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
; SKX-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2}
; SKX-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test5:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; SKX_32-NEXT: kmovw %k1, %k2
+; SKX_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k2}
+; SKX_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1}
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
@@ -267,6 +312,15 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) {
; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
; SKX-NEXT: vmovdqa64 %ymm2, %ymm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test6:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: kxnorw %k0, %k0, %k2
+; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm2 {%k2}
+; SKX_32-NEXT: vpscatterdd %ymm0, (,%ymm1) {%k1}
+; SKX_32-NEXT: vmovdqa64 %ymm2, %ymm0
+; SKX_32-NEXT: retl
%a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
@@ -309,6 +363,17 @@ define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
; SKX-NEXT: vpgatherdd (%rdi,%ymm0,4), %ymm2 {%k1}
; SKX-NEXT: vpaddd %ymm2, %ymm1, %ymm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test7:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kmovb {{[0-9]+}}(%esp), %k1
+; SKX_32-NEXT: kmovw %k1, %k2
+; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm1 {%k2}
+; SKX_32-NEXT: vmovdqa64 %ymm1, %ymm2
+; SKX_32-NEXT: vpgatherdd (%eax,%ymm0,4), %ymm2 {%k1}
+; SKX_32-NEXT: vpaddd %ymm2, %ymm1, %ymm0
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <8 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <8 x i32*> %broadcast.splatinsert, <8 x i32*> undef, <8 x i32> zeroinitializer
@@ -415,13 +480,13 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
; KNL_32-LABEL: test9:
; KNL_32: # BB#0: # %entry
; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm2
-; KNL_32-NEXT: vpbroadcastd .LCPI8_0, %ymm3
+; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm3
; KNL_32-NEXT: vpmulld %ymm3, %ymm1, %ymm1
; KNL_32-NEXT: vpmovqd %zmm0, %ymm0
-; KNL_32-NEXT: vpbroadcastd .LCPI8_1, %ymm3
+; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm3
; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; KNL_32-NEXT: vpbroadcastd .LCPI8_2, %ymm1
+; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm1
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
@@ -441,6 +506,18 @@ define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test9:
+; SKX_32: # BB#0: # %entry
+; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm1, %ymm1
+; SKX_32-NEXT: vpmovqd %zmm0, %ymm0
+; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
+; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
+; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1}
+; SKX_32-NEXT: retl
entry:
%broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
%broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
@@ -477,13 +554,13 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
; KNL_32-LABEL: test10:
; KNL_32: # BB#0: # %entry
; KNL_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm2
-; KNL_32-NEXT: vpbroadcastd .LCPI9_0, %ymm3
+; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm3
; KNL_32-NEXT: vpmulld %ymm3, %ymm1, %ymm1
; KNL_32-NEXT: vpmovqd %zmm0, %ymm0
-; KNL_32-NEXT: vpbroadcastd .LCPI9_1, %ymm3
+; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm3
; KNL_32-NEXT: vpmulld %ymm3, %ymm0, %ymm0
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; KNL_32-NEXT: vpbroadcastd .LCPI9_2, %ymm1
+; KNL_32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm1
; KNL_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; KNL_32-NEXT: vpaddd %ymm0, %ymm2, %ymm0
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm1
@@ -503,6 +580,18 @@ define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test10:
+; SKX_32: # BB#0: # %entry
+; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm1, %ymm1
+; SKX_32-NEXT: vpmovqd %zmm0, %ymm0
+; SKX_32-NEXT: vpmulld {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
+; SKX_32-NEXT: vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
+; SKX_32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; SKX_32-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vpgatherdd (,%ymm1), %ymm0 {%k1}
+; SKX_32-NEXT: retl
entry:
%broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
%broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
@@ -535,6 +624,14 @@ define <16 x float> @test11(float* %base, i32 %ind) {
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test11:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %zmm1
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
@@ -568,6 +665,14 @@ define <16 x float> @test12(float* %base, <16 x i32> %ind) {
; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test12:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: kxnorw %k0, %k0, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
%sext_ind = sext <16 x i32> %ind to <16 x i64>
%gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
@@ -596,6 +701,13 @@ define <16 x float> @test13(float* %base, <16 x i32> %ind) {
; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test13:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
%sext_ind = sext <16 x i32> %ind to <16 x i64>
%gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
@@ -675,25 +787,29 @@ define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
;
; KNL_64-LABEL: test15:
; KNL_64: # BB#0:
-; KNL_64: vpxor %ymm2, %ymm2, %ymm2
+; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_64-NEXT: vpxor %ymm2, %ymm2, %ymm2
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL_64-NEXT: vpmovsxdq %ymm0, %zmm2
; KNL_64-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1}
-; KNL_64-NEXT: # kill
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test15:
; KNL_32: # BB#0:
-; KNL_32: vpxor %ymm2, %ymm2, %ymm2
+; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_32-NEXT: vpxor %ymm2, %ymm2, %ymm2
; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm2
; KNL_32-NEXT: vpslld $31, %ymm1, %ymm0
; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm2,4), %ymm0 {%k1}
-; KNL_32-NEXT: # kill
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL_32-NEXT: retl
;
; SKX-LABEL: test15:
@@ -724,7 +840,9 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x
;
; KNL_64-LABEL: test16:
; KNL_64: # BB#0:
-; KNL_64: vpslld $31, %xmm1, %xmm1
+; KNL_64-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
@@ -738,14 +856,16 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x
;
; KNL_32-LABEL: test16:
; KNL_32: # BB#0:
-; KNL_32: vpslld $31, %xmm1, %xmm1
+; KNL_32-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpmovsxdq %ymm0, %zmm0
-; KNL_32-NEXT: vpsllvq .LCPI15_0, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
; KNL_32-NEXT: vmovapd %zmm2, %zmm0
@@ -778,7 +898,9 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x
;
; KNL_64-LABEL: test17:
; KNL_64: # BB#0:
-; KNL_64: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
@@ -788,10 +910,12 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x
;
; KNL_32-LABEL: test17:
; KNL_32: # BB#0:
-; KNL_32: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllvq .LCPI16_0, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
; KNL_32-NEXT: vmovapd %zmm2, %zmm0
@@ -830,7 +954,10 @@ define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
;
; KNL_64-LABEL: test18:
; KNL_64: # BB#0:
-; KNL_64: vpxor %ymm3, %ymm3, %ymm3
+; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
+; KNL_64-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_64-NEXT: vpslld $31, %ymm2, %ymm2
; KNL_64-NEXT: vptestmd %zmm2, %zmm2, %k1
@@ -839,7 +966,10 @@ define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
;
; KNL_32-LABEL: test18:
; KNL_32: # BB#0:
-; KNL_32: vpxor %ymm3, %ymm3, %ymm3
+; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
+; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
; KNL_32-NEXT: vpmovsxdq %ymm1, %zmm1
; KNL_32-NEXT: vpslld $31, %ymm2, %ymm2
@@ -868,7 +998,9 @@ define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind
;
; KNL_64-LABEL: test19:
; KNL_64: # BB#0:
-; KNL_64: vpslld $31, %xmm1, %xmm1
+; KNL_64-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
@@ -880,13 +1012,15 @@ define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind
;
; KNL_32-LABEL: test19:
; KNL_32: # BB#0:
-; KNL_32: vpslld $31, %xmm1, %xmm1
+; KNL_32-NEXT: # kill: %YMM2<def> %YMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL_32-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti64x4 $0, %ymm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllvq .LCPI18_0, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vscatterqpd %zmm0, (%eax,%zmm2,8) {%k1}
; KNL_32-NEXT: retl
@@ -915,7 +1049,9 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
;
; KNL_64-LABEL: test20:
; KNL_64: # BB#0:
-; KNL_64: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; KNL_64-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero
; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
@@ -926,7 +1062,8 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
;
; KNL_32-LABEL: test20:
; KNL_32: # BB#0:
-; KNL_32: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; KNL_32-NEXT: vmovq {{.*#+}} xmm2 = xmm2[0],zero
; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
@@ -939,7 +1076,8 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
;
; SKX-LABEL: test20:
; SKX: # BB#0:
-; SKX: vpsllq $63, %xmm2, %xmm2
+; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k1
@@ -964,7 +1102,8 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
;
; KNL_64-LABEL: test21:
; KNL_64: # BB#0:
-; KNL_64: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: vpsllq $63, %zmm2, %zmm2
@@ -974,17 +1113,19 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
;
; KNL_32-LABEL: test21:
; KNL_32: # BB#0:
-; KNL_32: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm2, %zmm3, %zmm2
; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; KNL_32-NEXT: vpsllvq .LCPI20_0, %zmm2, %zmm2
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
; KNL_32-NEXT: retl
;
; SKX-LABEL: test21:
; SKX: # BB#0:
-; SKX: vpsllq $63, %xmm2, %xmm2
+; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; SKX-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX-NEXT: kshiftlb $6, %k0, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k1
@@ -994,7 +1135,8 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
;
; SKX_32-LABEL: test21:
; SKX_32: # BB#0:
-; SKX_32: vpsllq $63, %xmm2, %xmm2
+; SKX_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2
; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k0
; SKX_32-NEXT: kshiftlb $6, %k0, %k0
; SKX_32-NEXT: kshiftrb $6, %k0, %k1
@@ -1013,7 +1155,8 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
;
; KNL_64-LABEL: test22:
; KNL_64: # BB#0:
-; KNL_64: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_64-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero
; KNL_64-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_64-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
@@ -1027,7 +1170,8 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl
;
; KNL_32-LABEL: test22:
; KNL_32: # BB#0:
-; KNL_32: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %YMM2<def>
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; KNL_32-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero
; KNL_32-NEXT: vpxor %ymm3, %ymm3, %ymm3
; KNL_32-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
@@ -1075,7 +1219,9 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
;
; KNL_64-LABEL: test23:
; KNL_64: # BB#0:
-; KNL_64: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
@@ -1085,10 +1231,12 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
;
; KNL_32-LABEL: test23:
; KNL_32: # BB#0:
-; KNL_32: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllvq .LCPI22_0, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
@@ -1119,7 +1267,8 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %
define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
; KNL_64-LABEL: test24:
; KNL_64: # BB#0:
-; KNL_64: movb $3, %al
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
@@ -1127,10 +1276,11 @@ define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
;
; KNL_32-LABEL: test24:
; KNL_32: # BB#0:
-; KNL_32: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpxord %zmm1, %zmm1, %zmm1
-; KNL_32-NEXT: vinserti32x4 $0, .LCPI23_0, %zmm1, %zmm1
-; KNL_32-NEXT: vpsllvq .LCPI23_1, %zmm1, %zmm1
+; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
@@ -1160,7 +1310,9 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
;
; KNL_64-LABEL: test25:
; KNL_64: # BB#0:
-; KNL_64: vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_64-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_64-NEXT: vpsllq $63, %zmm1, %zmm1
; KNL_64-NEXT: vptestmq %zmm1, %zmm1, %k1
@@ -1170,10 +1322,12 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %
;
; KNL_32-LABEL: test25:
; KNL_32: # BB#0:
-; KNL_32: vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT: # kill: %XMM2<def> %XMM2<kill> %ZMM2<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_32-NEXT: vpxord %zmm3, %zmm3, %zmm3
; KNL_32-NEXT: vinserti32x4 $0, %xmm1, %zmm3, %zmm1
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT: vpsllvq .LCPI24_0, %zmm1, %zmm1
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm1, %zmm1
; KNL_32-NEXT: vptestmq %zmm1, %zmm1, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
; KNL_32-NEXT: vmovdqa64 %zmm2, %zmm0
@@ -1205,7 +1359,9 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
;
; KNL_64-LABEL: test26:
; KNL_64: # BB#0:
-; KNL_64: movb $3, %al
+; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
; KNL_64-NEXT: vmovdqa64 %zmm1, %zmm0
@@ -1213,10 +1369,12 @@ define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
;
; KNL_32-LABEL: test26:
; KNL_32: # BB#0:
-; KNL_32: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2
-; KNL_32-NEXT: vinserti32x4 $0, .LCPI25_0, %zmm2, %zmm2
-; KNL_32-NEXT: vpsllvq .LCPI25_1, %zmm2, %zmm2
+; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm2, %zmm2
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_32-NEXT: vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
; KNL_32-NEXT: vmovdqa64 %zmm1, %zmm0
@@ -1252,7 +1410,7 @@ define <2 x float> @test27(float* %base, <2 x i32> %ind) {
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
-; KNL_64-NEXT: # kill
+; KNL_64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test27:
@@ -1263,7 +1421,7 @@ define <2 x float> @test27(float* %base, <2 x i32> %ind) {
; KNL_32-NEXT: movb $3, %cl
; KNL_32-NEXT: kmovw %ecx, %k1
; KNL_32-NEXT: vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
-; KNL_32-NEXT: # kill
+; KNL_32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL_32-NEXT: retl
;
; SKX-LABEL: test27:
@@ -1273,6 +1431,15 @@ define <2 x float> @test27(float* %base, <2 x i32> %ind) {
; SKX-NEXT: kmovb %eax, %k1
; SKX-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1}
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test27:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: movb $3, %cl
+; SKX_32-NEXT: kmovb %ecx, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%xmm1,4), %xmm0 {%k1}
+; SKX_32-NEXT: retl
%sext_ind = sext <2 x i32> %ind to <2 x i64>
%gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
%res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef)
@@ -1285,7 +1452,8 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
;
; KNL_64-LABEL: test28:
; KNL_64: # BB#0:
-; KNL_64: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_64-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_64-NEXT: movb $3, %al
; KNL_64-NEXT: kmovw %eax, %k1
; KNL_64-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
@@ -1293,17 +1461,19 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
;
; KNL_32-LABEL: test28:
; KNL_32: # BB#0:
-; KNL_32: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
+; KNL_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; KNL_32-NEXT: vpxord %zmm2, %zmm2, %zmm2
-; KNL_32-NEXT: vinserti32x4 $0, .LCPI27_0, %zmm2, %zmm2
-; KNL_32-NEXT: vpsllvq .LCPI27_1, %zmm2, %zmm2
+; KNL_32-NEXT: vinserti32x4 $0, {{\.LCPI.*}}, %zmm2, %zmm2
+; KNL_32-NEXT: vpsllvq {{\.LCPI.*}}, %zmm2, %zmm2
; KNL_32-NEXT: vptestmq %zmm2, %zmm2, %k1
; KNL_32-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k1}
; KNL_32-NEXT: retl
;
; SKX-LABEL: test28:
; SKX: # BB#0:
-; SKX: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX-NEXT: movb $3, %al
; SKX-NEXT: kmovb %eax, %k1
; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
@@ -1311,7 +1481,8 @@ define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
;
; SKX_32-LABEL: test28:
; SKX_32: # BB#0:
-; SKX_32: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX_32-NEXT: # kill: %XMM1<def> %XMM1<kill> %YMM1<def>
+; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SKX_32-NEXT: movb $3, %al
; SKX_32-NEXT: kmovb %eax, %k1
; SKX_32-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1}
@@ -1353,6 +1524,15 @@ define <16 x float> @test29(float* %base, <16 x i32> %ind) {
; SKX-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
; SKX-NEXT: vmovaps %zmm1, %zmm0
; SKX-NEXT: retq
+;
+; SKX_32-LABEL: test29:
+; SKX_32: # BB#0:
+; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT: movw $44, %cx
+; SKX_32-NEXT: kmovw %ecx, %k1
+; SKX_32-NEXT: vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; SKX_32-NEXT: vmovaps %zmm1, %zmm0
+; SKX_32-NEXT: retl
%broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
%broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
@@ -1370,9 +1550,12 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; KNL_64-LABEL: test30:
; KNL_64: # BB#0:
; KNL_64-NEXT: andl $1, %edx
+; KNL_64-NEXT: kmovw %edx, %k1
; KNL_64-NEXT: andl $1, %esi
+; KNL_64-NEXT: kmovw %esi, %k2
; KNL_64-NEXT: movl %edi, %eax
; KNL_64-NEXT: andl $1, %eax
+; KNL_64-NEXT: kmovw %eax, %k0
; KNL_64-NEXT: vpmovsxdq %xmm1, %ymm1
; KNL_64-NEXT: vpsllq $2, %ymm1, %ymm1
; KNL_64-NEXT: vpaddq %ymm1, %ymm0, %ymm1
@@ -1380,76 +1563,81 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; KNL_64-NEXT: testb $1, %dil
; KNL_64-NEXT: je .LBB29_2
; KNL_64-NEXT: # BB#1: # %cond.load
-; KNL_64-NEXT: vmovq %xmm1, %rcx
+; KNL_64-NEXT: vmovq %xmm1, %rax
; KNL_64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL_64-NEXT: .LBB29_2: # %else
-; KNL_64-NEXT: testb %sil, %sil
+; KNL_64-NEXT: kmovw %k2, %eax
+; KNL_64-NEXT: movl %eax, %ecx
+; KNL_64-NEXT: andl $1, %ecx
+; KNL_64-NEXT: testb %cl, %cl
; KNL_64-NEXT: je .LBB29_4
; KNL_64-NEXT: # BB#3: # %cond.load1
; KNL_64-NEXT: vpextrq $1, %xmm1, %rcx
; KNL_64-NEXT: vpinsrd $1, (%rcx), %xmm0, %xmm0
; KNL_64-NEXT: .LBB29_4: # %else2
+; KNL_64-NEXT: kmovw %k1, %ecx
+; KNL_64-NEXT: movl %ecx, %edx
+; KNL_64-NEXT: andl $1, %edx
; KNL_64-NEXT: testb %dl, %dl
; KNL_64-NEXT: je .LBB29_6
; KNL_64-NEXT: # BB#5: # %cond.load4
; KNL_64-NEXT: vextracti128 $1, %ymm1, %xmm1
-; KNL_64-NEXT: vmovq %xmm1, %rcx
-; KNL_64-NEXT: vpinsrd $2, (%rcx), %xmm0, %xmm0
+; KNL_64-NEXT: vmovq %xmm1, %rdx
+; KNL_64-NEXT: vpinsrd $2, (%rdx), %xmm0, %xmm0
; KNL_64-NEXT: .LBB29_6: # %else5
-; KNL_64-NEXT: vmovd %eax, %xmm1
-; KNL_64-NEXT: vpinsrd $1, %esi, %xmm1, %xmm1
-; KNL_64-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1
+; KNL_64-NEXT: kmovw %k0, %edx
+; KNL_64-NEXT: vmovd %edx, %xmm1
+; KNL_64-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; KNL_64-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
; KNL_64-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_64-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test30:
; KNL_32: # BB#0:
-; KNL_32-NEXT: pushl %ebx
-; KNL_32-NEXT: .Ltmp0:
-; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: pushl %esi
-; KNL_32-NEXT: .Ltmp1:
-; KNL_32-NEXT: .cfi_def_cfa_offset 12
-; KNL_32-NEXT: .Ltmp2:
-; KNL_32-NEXT: .cfi_offset %esi, -12
-; KNL_32-NEXT: .Ltmp3:
-; KNL_32-NEXT: .cfi_offset %ebx, -8
; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_32-NEXT: andl $1, %eax
-; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; KNL_32-NEXT: kmovw %eax, %k1
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: andl $1, %eax
+; KNL_32-NEXT: kmovw %eax, %k2
+; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT: movl %eax, %ecx
; KNL_32-NEXT: andl $1, %ecx
-; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; KNL_32-NEXT: movl %ebx, %edx
-; KNL_32-NEXT: andl $1, %edx
+; KNL_32-NEXT: kmovw %ecx, %k0
; KNL_32-NEXT: vpslld $2, %xmm1, %xmm1
; KNL_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
; KNL_32-NEXT: # implicit-def: %XMM0
-; KNL_32-NEXT: testb $1, %bl
+; KNL_32-NEXT: testb $1, %al
; KNL_32-NEXT: je .LBB29_2
; KNL_32-NEXT: # BB#1: # %cond.load
-; KNL_32-NEXT: vmovd %xmm1, %esi
+; KNL_32-NEXT: vmovd %xmm1, %eax
; KNL_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; KNL_32-NEXT: .LBB29_2: # %else
+; KNL_32-NEXT: kmovw %k2, %eax
+; KNL_32-NEXT: movl %eax, %ecx
+; KNL_32-NEXT: andl $1, %ecx
; KNL_32-NEXT: testb %cl, %cl
; KNL_32-NEXT: je .LBB29_4
; KNL_32-NEXT: # BB#3: # %cond.load1
-; KNL_32-NEXT: vpextrd $1, %xmm1, %esi
-; KNL_32-NEXT: vpinsrd $1, (%esi), %xmm0, %xmm0
+; KNL_32-NEXT: vpextrd $1, %xmm1, %ecx
+; KNL_32-NEXT: vpinsrd $1, (%ecx), %xmm0, %xmm0
; KNL_32-NEXT: .LBB29_4: # %else2
-; KNL_32-NEXT: testb %al, %al
+; KNL_32-NEXT: kmovw %k1, %ecx
+; KNL_32-NEXT: movl %ecx, %edx
+; KNL_32-NEXT: andl $1, %edx
+; KNL_32-NEXT: testb %dl, %dl
; KNL_32-NEXT: je .LBB29_6
; KNL_32-NEXT: # BB#5: # %cond.load4
-; KNL_32-NEXT: vpextrd $2, %xmm1, %esi
-; KNL_32-NEXT: vpinsrd $2, (%esi), %xmm0, %xmm0
+; KNL_32-NEXT: vpextrd $2, %xmm1, %edx
+; KNL_32-NEXT: vpinsrd $2, (%edx), %xmm0, %xmm0
; KNL_32-NEXT: .LBB29_6: # %else5
+; KNL_32-NEXT: kmovw %k0, %edx
; KNL_32-NEXT: vmovd %edx, %xmm1
-; KNL_32-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
-; KNL_32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
+; KNL_32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
+; KNL_32-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
; KNL_32-NEXT: vpslld $31, %xmm1, %xmm1
; KNL_32-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0
-; KNL_32-NEXT: popl %esi
-; KNL_32-NEXT: popl %ebx
; KNL_32-NEXT: retl
;
; SKX-LABEL: test30:
@@ -1460,35 +1648,38 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: vpmovsxdq %xmm1, %ymm1
; SKX-NEXT: vpsllq $2, %ymm1, %ymm1
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm1
+; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0
; SKX-NEXT: kmovw %k0, %eax
-; SKX-NEXT: # implicit-def: %XMM0
+; SKX-NEXT: andl $1, %eax
+; SKX-NEXT: # implicit-def: %XMM1
; SKX-NEXT: testb %al, %al
; SKX-NEXT: je .LBB29_2
; SKX-NEXT: # BB#1: # %cond.load
-; SKX-NEXT: vmovq %xmm1, %rax
-; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SKX-NEXT: vmovq %xmm0, %rax
+; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SKX-NEXT: .LBB29_2: # %else
; SKX-NEXT: kshiftlw $14, %k1, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: je .LBB29_4
; SKX-NEXT: # BB#3: # %cond.load1
-; SKX-NEXT: vpextrq $1, %xmm1, %rax
-; SKX-NEXT: vpinsrd $1, (%rax), %xmm0, %xmm0
+; SKX-NEXT: vpextrq $1, %xmm0, %rax
+; SKX-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm1
; SKX-NEXT: .LBB29_4: # %else2
; SKX-NEXT: kshiftlw $13, %k1, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: je .LBB29_6
; SKX-NEXT: # BB#5: # %cond.load4
-; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm1
-; SKX-NEXT: vmovq %xmm1, %rax
-; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
+; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0
+; SKX-NEXT: vmovq %xmm0, %rax
+; SKX-NEXT: vpinsrd $2, (%rax), %xmm1, %xmm1
; SKX-NEXT: .LBB29_6: # %else5
-; SKX-NEXT: vpblendmd %xmm0, %xmm3, %xmm0 {%k1}
+; SKX-NEXT: vpblendmd %xmm1, %xmm3, %xmm0 {%k1}
; SKX-NEXT: retq
;
; SKX_32-LABEL: test30:
@@ -1501,35 +1692,38 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; SKX_32-NEXT: kshiftlw $15, %k1, %k0
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1
-; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm1
+; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; SKX_32-NEXT: kmovw %k0, %eax
-; SKX_32-NEXT: # implicit-def: %XMM0
+; SKX_32-NEXT: andl $1, %eax
+; SKX_32-NEXT: # implicit-def: %XMM1
; SKX_32-NEXT: testb %al, %al
; SKX_32-NEXT: je .LBB29_2
; SKX_32-NEXT: # BB#1: # %cond.load
-; SKX_32-NEXT: vmovd %xmm1, %eax
-; SKX_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SKX_32-NEXT: vmovd %xmm0, %eax
+; SKX_32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SKX_32-NEXT: .LBB29_2: # %else
; SKX_32-NEXT: kshiftlw $14, %k1, %k0
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
; SKX_32-NEXT: kmovw %k0, %eax
+; SKX_32-NEXT: andl $1, %eax
; SKX_32-NEXT: testb %al, %al
; SKX_32-NEXT: je .LBB29_4
; SKX_32-NEXT: # BB#3: # %cond.load1
-; SKX_32-NEXT: vpextrd $1, %xmm1, %eax
-; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm0, %xmm0
+; SKX_32-NEXT: vpextrd $1, %xmm0, %eax
+; SKX_32-NEXT: vpinsrd $1, (%eax), %xmm1, %xmm1
; SKX_32-NEXT: .LBB29_4: # %else2
; SKX_32-NEXT: vmovdqa32 {{[0-9]+}}(%esp), %xmm2
; SKX_32-NEXT: kshiftlw $13, %k1, %k0
; SKX_32-NEXT: kshiftrw $15, %k0, %k0
; SKX_32-NEXT: kmovw %k0, %eax
+; SKX_32-NEXT: andl $1, %eax
; SKX_32-NEXT: testb %al, %al
; SKX_32-NEXT: je .LBB29_6
; SKX_32-NEXT: # BB#5: # %cond.load4
-; SKX_32-NEXT: vpextrd $2, %xmm1, %eax
-; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm0, %xmm0
+; SKX_32-NEXT: vpextrd $2, %xmm0, %eax
+; SKX_32-NEXT: vpinsrd $2, (%eax), %xmm1, %xmm1
; SKX_32-NEXT: .LBB29_6: # %else5
-; SKX_32-NEXT: vpblendmd %xmm0, %xmm2, %xmm0 {%k1}
+; SKX_32-NEXT: vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
; SKX_32-NEXT: addl $12, %esp
; SKX_32-NEXT: retl
@@ -1646,12 +1840,12 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i
; KNL_32-LABEL: test_gather_16i64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Ltmp4:
+; KNL_32-NEXT: .Ltmp0:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Ltmp5:
+; KNL_32-NEXT: .Ltmp1:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Ltmp6:
+; KNL_32-NEXT: .Ltmp2:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
@@ -1769,12 +1963,12 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <
; KNL_32-LABEL: test_gather_16f64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Ltmp7:
+; KNL_32-NEXT: .Ltmp3:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Ltmp8:
+; KNL_32-NEXT: .Ltmp4:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Ltmp9:
+; KNL_32-NEXT: .Ltmp5:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
@@ -1886,12 +2080,12 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %
; KNL_32-LABEL: test_scatter_16i64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Ltmp10:
+; KNL_32-NEXT: .Ltmp6:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Ltmp11:
+; KNL_32-NEXT: .Ltmp7:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Ltmp12:
+; KNL_32-NEXT: .Ltmp8:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
@@ -2000,12 +2194,12 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou
; KNL_32-LABEL: test_scatter_16f64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Ltmp13:
+; KNL_32-NEXT: .Ltmp9:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Ltmp14:
+; KNL_32-NEXT: .Ltmp10:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Ltmp15:
+; KNL_32-NEXT: .Ltmp11:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
OpenPOWER on IntegriCloud