diff options
| author | Ayman Musa <ayman.musa@intel.com> | 2017-09-04 09:31:32 +0000 |
|---|---|---|
| committer | Ayman Musa <ayman.musa@intel.com> | 2017-09-04 09:31:32 +0000 |
| commit | 5defce3986887f1b2aaffdf835a542b877708d6c (patch) | |
| tree | a862e5255d14b7b272c54e41076d0fb1aa8cf6fa /llvm/test | |
| parent | 2661ae48c70252b311a5b809df945f1ca3681aad (diff) | |
| download | bcm5719-llvm-5defce3986887f1b2aaffdf835a542b877708d6c.tar.gz bcm5719-llvm-5defce3986887f1b2aaffdf835a542b877708d6c.zip | |
[X86] Replace -mcpu option with -mattr in LIT tests added in https://reviews.llvm.org/rL312442
llvm-svn: 312474
Diffstat (limited to 'llvm/test')
13 files changed, 953 insertions, 952 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-fp.ll b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-fp.ll index 81bf7ea1453..64aa3a2a8a3 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-fp.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-fp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s define <4 x double> @test_double_to_4(double %s) { ; CHECK-LABEL: test_double_to_4: @@ -14,7 +14,7 @@ define <4 x double> @test_masked_double_to_4_mask0(double %s, <4 x double> %defa ; CHECK-LABEL: test_masked_double_to_4_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -28,7 +28,7 @@ define <4 x double> @test_masked_z_double_to_4_mask0(double %s) { ; CHECK-LABEL: test_masked_z_double_to_4_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x double> undef, double %s, i32 0 @@ -40,7 +40,7 @@ define <4 x double> @test_masked_double_to_4_mask1(double %s, <4 x double> %defa ; CHECK-LABEL: test_masked_double_to_4_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -54,7 +54,7 @@ define <4 x double> @test_masked_z_double_to_4_mask1(double %s) { ; CHECK-LABEL: test_masked_z_double_to_4_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x double> undef, double %s, i32 0 @@ -66,7 +66,7 @@ define <4 x double> @test_masked_double_to_4_mask2(double %s, <4 x double> %defa ; CHECK-LABEL: test_masked_double_to_4_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -80,7 +80,7 @@ define <4 x double> @test_masked_z_double_to_4_mask2(double %s) { ; CHECK-LABEL: test_masked_z_double_to_4_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x double> undef, double %s, i32 0 @@ -92,7 +92,7 @@ define <4 x double> @test_masked_double_to_4_mask3(double %s, <4 x double> %defa ; CHECK-LABEL: test_masked_double_to_4_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -106,7 +106,7 @@ define <4 x double> @test_masked_z_double_to_4_mask3(double %s) { ; CHECK-LABEL: test_masked_z_double_to_4_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x double> undef, double %s, i32 0 @@ -127,7 +127,7 @@ define <8 x double> @test_masked_double_to_8_mask0(double %s, <8 x double> %defa ; CHECK-LABEL: test_masked_double_to_8_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -141,7 +141,7 @@ define <8 x double> @test_masked_z_double_to_8_mask0(double %s) { ; CHECK-LABEL: test_masked_z_double_to_8_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x double> undef, double %s, i32 0 @@ -153,7 +153,7 @@ define <8 x double> @test_masked_double_to_8_mask1(double %s, <8 x double> %defa ; CHECK-LABEL: test_masked_double_to_8_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $103, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -167,7 +167,7 @@ define <8 x double> @test_masked_z_double_to_8_mask1(double %s) { ; CHECK-LABEL: test_masked_z_double_to_8_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $103, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x double> undef, double %s, i32 0 @@ -179,7 +179,7 @@ define <8 x double> @test_masked_double_to_8_mask2(double %s, <8 x double> %defa ; CHECK-LABEL: test_masked_double_to_8_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-56, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -193,7 +193,7 @@ define <8 x double> @test_masked_z_double_to_8_mask2(double %s) { ; CHECK-LABEL: test_masked_z_double_to_8_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-56, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x double> undef, double %s, i32 0 @@ -205,7 +205,7 @@ define <8 x double> @test_masked_double_to_8_mask3(double %s, <8 x double> %defa ; CHECK-LABEL: test_masked_double_to_8_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $78, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -219,7 +219,7 @@ define <8 x double> @test_masked_z_double_to_8_mask3(double %s) { ; CHECK-LABEL: test_masked_z_double_to_8_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $78, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x double> undef, double %s, i32 0 @@ -240,7 +240,7 @@ define <4 x float> @test_masked_float_to_4_mask0(float %s, <4 x float> %default) ; CHECK-LABEL: test_masked_float_to_4_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -254,7 +254,7 @@ define <4 x float> @test_masked_z_float_to_4_mask0(float %s) { ; CHECK-LABEL: test_masked_z_float_to_4_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x float> undef, float %s, i32 0 @@ -266,7 +266,7 @@ define <4 x float> @test_masked_float_to_4_mask1(float %s, <4 x float> %default) ; CHECK-LABEL: test_masked_float_to_4_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -280,7 +280,7 @@ define <4 x float> @test_masked_z_float_to_4_mask1(float %s) { ; CHECK-LABEL: test_masked_z_float_to_4_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x float> undef, float %s, i32 0 @@ -292,7 +292,7 @@ define <4 x float> @test_masked_float_to_4_mask2(float %s, <4 x float> %default) ; CHECK-LABEL: test_masked_float_to_4_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -306,7 +306,7 @@ define <4 x float> @test_masked_z_float_to_4_mask2(float %s) { ; CHECK-LABEL: test_masked_z_float_to_4_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x float> undef, float %s, i32 0 @@ -318,7 +318,7 @@ define <4 x float> @test_masked_float_to_4_mask3(float %s, <4 x float> %default) ; CHECK-LABEL: test_masked_float_to_4_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -332,7 +332,7 @@ define <4 x float> @test_masked_z_float_to_4_mask3(float %s) { ; CHECK-LABEL: test_masked_z_float_to_4_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x float> undef, float %s, i32 0 @@ -353,7 +353,7 @@ define <8 x float> @test_masked_float_to_8_mask0(float %s, <8 x float> %default) ; CHECK-LABEL: test_masked_float_to_8_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $72, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -367,7 +367,7 @@ define <8 x float> @test_masked_z_float_to_8_mask0(float %s) { ; CHECK-LABEL: test_masked_z_float_to_8_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $72, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x float> undef, float %s, i32 0 @@ -379,7 +379,7 @@ define <8 x float> @test_masked_float_to_8_mask1(float %s, <8 x float> %default) ; CHECK-LABEL: test_masked_float_to_8_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-64, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -393,7 +393,7 @@ define <8 x float> @test_masked_z_float_to_8_mask1(float %s) { ; CHECK-LABEL: test_masked_z_float_to_8_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-64, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x float> undef, float %s, i32 0 @@ -405,7 +405,7 @@ define <8 x float> @test_masked_float_to_8_mask2(float %s, <8 x float> %default) ; CHECK-LABEL: test_masked_float_to_8_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -419,7 +419,7 @@ define <8 x float> @test_masked_z_float_to_8_mask2(float %s) { ; CHECK-LABEL: test_masked_z_float_to_8_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x float> undef, float %s, i32 0 @@ -431,7 +431,7 @@ define <8 x float> @test_masked_float_to_8_mask3(float %s, <8 x float> %default) ; CHECK-LABEL: test_masked_float_to_8_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $64, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1} ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -445,7 +445,7 @@ define <8 x float> @test_masked_z_float_to_8_mask3(float %s) { ; CHECK-LABEL: test_masked_z_float_to_8_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $64, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x float> undef, float %s, i32 0 @@ -466,7 +466,7 @@ define <16 x float> @test_masked_float_to_16_mask0(float %s, <16 x float> %defau ; CHECK-LABEL: test_masked_float_to_16_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-5916, %ax # imm = 0xE8E4 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -480,7 +480,7 @@ define <16 x float> @test_masked_z_float_to_16_mask0(float %s) { ; CHECK-LABEL: test_masked_z_float_to_16_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-5916, %ax # imm = 0xE8E4 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x float> undef, float %s, i32 0 @@ -492,7 +492,7 @@ define <16 x float> @test_masked_float_to_16_mask1(float %s, <16 x float> %defau ; CHECK-LABEL: test_masked_float_to_16_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-1130, %ax # imm = 0xFB96 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -506,7 +506,7 @@ define <16 x float> @test_masked_z_float_to_16_mask1(float %s) { ; CHECK-LABEL: test_masked_z_float_to_16_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-1130, %ax # imm = 0xFB96 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x float> undef, float %s, i32 0 @@ -518,7 +518,7 @@ define <16 x float> @test_masked_float_to_16_mask2(float %s, <16 x float> %defau ; CHECK-LABEL: test_masked_float_to_16_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12439, %ax # imm = 0xCF69 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -532,7 +532,7 @@ define <16 x float> @test_masked_z_float_to_16_mask2(float %s) { ; CHECK-LABEL: test_masked_z_float_to_16_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12439, %ax # imm = 0xCF69 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x float> undef, float %s, i32 0 @@ -544,7 +544,7 @@ define <16 x float> @test_masked_float_to_16_mask3(float %s, <16 x float> %defau ; CHECK-LABEL: test_masked_float_to_16_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-6413, %ax # imm = 0xE6F3 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -558,7 +558,7 @@ define <16 x float> @test_masked_z_float_to_16_mask3(float %s) { ; CHECK-LABEL: test_masked_z_float_to_16_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-6413, %ax # imm = 0xE6F3 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = insertelement <2 x float> undef, float %s, i32 0 @@ -580,7 +580,7 @@ define <4 x double> @test_masked_double_to_4_mem_mask0(double* %p, <4 x double> ; CHECK-LABEL: test_masked_double_to_4_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %s = load double, double* %p @@ -594,7 +594,7 @@ define <4 x double> @test_masked_z_double_to_4_mem_mask0(double* %p) { ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq %s = load double, double* %p @@ -607,7 +607,7 @@ define <4 x double> @test_masked_double_to_4_mem_mask1(double* %p, <4 x double> ; CHECK-LABEL: test_masked_double_to_4_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %s = load double, double* %p @@ -621,7 +621,7 @@ define <4 x double> @test_masked_z_double_to_4_mem_mask1(double* %p) { ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq %s = load double, double* %p @@ -634,7 +634,7 @@ define <4 x double> @test_masked_double_to_4_mem_mask2(double* %p, <4 x double> ; CHECK-LABEL: test_masked_double_to_4_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %s = load double, double* %p @@ -648,7 +648,7 @@ define <4 x double> @test_masked_z_double_to_4_mem_mask2(double* %p) { ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq %s = load double, double* %p @@ -661,7 +661,7 @@ define <4 x double> @test_masked_double_to_4_mem_mask3(double* %p, <4 x double> ; CHECK-LABEL: test_masked_double_to_4_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %s = load double, double* %p @@ -675,7 +675,7 @@ define <4 x double> @test_masked_z_double_to_4_mem_mask3(double* %p) { ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq %s = load double, double* %p @@ -698,7 +698,7 @@ define <8 x double> @test_masked_double_to_8_mem_mask0(double* %p, <8 x double> ; CHECK-LABEL: test_masked_double_to_8_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $120, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq %s = load double, double* %p @@ -712,7 +712,7 @@ define <8 x double> @test_masked_z_double_to_8_mem_mask0(double* %p) { ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $120, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq %s = load double, double* %p @@ -725,7 +725,7 @@ define <8 x double> @test_masked_double_to_8_mem_mask1(double* %p, <8 x double> ; CHECK-LABEL: test_masked_double_to_8_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $26, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq %s = load double, double* %p @@ -739,7 +739,7 @@ define <8 x double> @test_masked_z_double_to_8_mem_mask1(double* %p) { ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $26, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq %s = load double, double* %p @@ -752,7 +752,7 @@ define <8 x double> @test_masked_double_to_8_mem_mask2(double* %p, <8 x double> ; CHECK-LABEL: test_masked_double_to_8_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $111, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq %s = load double, double* %p @@ -766,7 +766,7 @@ define <8 x double> @test_masked_z_double_to_8_mem_mask2(double* %p) { ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $111, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq %s = load double, double* %p @@ -779,7 +779,7 @@ define <8 x double> @test_masked_double_to_8_mem_mask3(double* %p, <8 x double> ; CHECK-LABEL: test_masked_double_to_8_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-100, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq %s = load double, double* %p @@ -793,7 +793,7 @@ define <8 x double> @test_masked_z_double_to_8_mem_mask3(double* %p) { ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-100, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq %s = load double, double* %p @@ -816,7 +816,7 @@ define <4 x float> @test_masked_float_to_4_mem_mask0(float* %p, <4 x float> %def ; CHECK-LABEL: test_masked_float_to_4_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %s = load float, float* %p @@ -830,7 +830,7 @@ define <4 x float> @test_masked_z_float_to_4_mem_mask0(float* %p) { ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z} ; CHECK-NEXT: retq %s = load float, float* %p @@ -843,7 +843,7 @@ define <4 x float> @test_masked_float_to_4_mem_mask1(float* %p, <4 x float> %def ; CHECK-LABEL: test_masked_float_to_4_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %s = load float, float* %p @@ -857,7 +857,7 @@ define <4 x float> @test_masked_z_float_to_4_mem_mask1(float* %p) { ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z} ; CHECK-NEXT: retq %s = load float, float* %p @@ -870,7 +870,7 @@ define <4 x float> @test_masked_float_to_4_mem_mask2(float* %p, <4 x float> %def ; CHECK-LABEL: test_masked_float_to_4_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %s = load float, float* %p @@ -884,7 +884,7 @@ define <4 x float> @test_masked_z_float_to_4_mem_mask2(float* %p) { ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z} ; CHECK-NEXT: retq %s = load float, float* %p @@ -897,7 +897,7 @@ define <4 x float> @test_masked_float_to_4_mem_mask3(float* %p, <4 x float> %def ; CHECK-LABEL: test_masked_float_to_4_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} ; CHECK-NEXT: retq %s = load float, float* %p @@ -911,7 +911,7 @@ define <4 x float> @test_masked_z_float_to_4_mem_mask3(float* %p) { ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z} ; CHECK-NEXT: retq %s = load float, float* %p @@ -934,7 +934,7 @@ define <8 x float> @test_masked_float_to_8_mem_mask0(float* %p, <8 x float> %def ; CHECK-LABEL: test_masked_float_to_8_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $67, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %s = load float, float* %p @@ -948,7 +948,7 @@ define <8 x float> @test_masked_z_float_to_8_mem_mask0(float* %p) { ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $67, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq %s = load float, float* %p @@ -961,7 +961,7 @@ define <8 x float> @test_masked_float_to_8_mem_mask1(float* %p, <8 x float> %def ; CHECK-LABEL: test_masked_float_to_8_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-51, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %s = load float, float* %p @@ -975,7 +975,7 @@ define <8 x float> @test_masked_z_float_to_8_mem_mask1(float* %p) { ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-51, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq %s = load float, float* %p @@ -988,7 +988,7 @@ define <8 x float> @test_masked_float_to_8_mem_mask2(float* %p, <8 x float> %def ; CHECK-LABEL: test_masked_float_to_8_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-116, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %s = load float, float* %p @@ -1002,7 +1002,7 @@ define <8 x float> @test_masked_z_float_to_8_mem_mask2(float* %p) { ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-116, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq %s = load float, float* %p @@ -1015,7 +1015,7 @@ define <8 x float> @test_masked_float_to_8_mem_mask3(float* %p, <8 x float> %def ; CHECK-LABEL: test_masked_float_to_8_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} ; CHECK-NEXT: retq %s = load float, float* %p @@ -1029,7 +1029,7 @@ define <8 x float> @test_masked_z_float_to_8_mem_mask3(float* %p) { ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z} ; CHECK-NEXT: retq %s = load float, float* %p @@ -1052,7 +1052,7 @@ define <16 x float> @test_masked_float_to_16_mem_mask0(float* %p, <16 x float> % ; CHECK-LABEL: test_masked_float_to_16_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-18370, %ax # imm = 0xB83E -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq %s = load float, float* %p @@ -1066,7 +1066,7 @@ define <16 x float> @test_masked_z_float_to_16_mem_mask0(float* %p) { ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-18370, %ax # imm = 0xB83E -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq %s = load float, float* %p @@ -1079,7 +1079,7 @@ define <16 x float> @test_masked_float_to_16_mem_mask1(float* %p, <16 x float> % ; CHECK-LABEL: test_masked_float_to_16_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $26137, %ax # imm = 0x6619 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq %s = load float, float* %p @@ -1093,7 +1093,7 @@ define <16 x float> @test_masked_z_float_to_16_mem_mask1(float* %p) { ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $26137, %ax # imm = 0x6619 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq %s = load float, float* %p @@ -1106,7 +1106,7 @@ define <16 x float> @test_masked_float_to_16_mem_mask2(float* %p, <16 x float> % ; CHECK-LABEL: test_masked_float_to_16_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-11480, %ax # imm = 0xD328 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq %s = load float, float* %p @@ -1120,7 +1120,7 @@ define <16 x float> @test_masked_z_float_to_16_mem_mask2(float* %p) { ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-11480, %ax # imm = 0xD328 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq %s = load float, float* %p @@ -1133,7 +1133,7 @@ define <16 x float> @test_masked_float_to_16_mem_mask3(float* %p, <16 x float> % ; CHECK-LABEL: test_masked_float_to_16_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-21749, %ax # imm = 0xAB0B -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} ; CHECK-NEXT: retq %s = load float, float* %p @@ -1147,7 +1147,7 @@ define <16 x float> @test_masked_z_float_to_16_mem_mask3(float* %p) { ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-21749, %ax # imm = 0xAB0B -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} ; CHECK-NEXT: retq %s = load float, float* %p diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll index 920aba6e623..b466a2f9ebc 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-scalar-int.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s define <16 x i8> @test_i8_to_16(i8 %s) { ; CHECK-LABEL: test_i8_to_16: diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll index dd5adb38431..f6229b1f8c6 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq %s -o - | FileCheck %s define <8 x float> @test_2xfloat_to_8xfloat(<8 x float> %vec) { ; CHECK-LABEL: test_2xfloat_to_8xfloat: @@ -13,7 +13,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mask0(<8 x float> %vec, <8 x ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -26,7 +26,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask0(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -37,7 +37,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mask1(<8 x float> %vec, <8 x ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -50,7 +50,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask1(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -61,7 +61,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mask2(<8 x float> %vec, <8 x ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-35, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -74,7 +74,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask2(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-35, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -85,7 +85,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mask3(<8 x float> %vec, <8 x ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -98,7 +98,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask3(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -117,7 +117,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mask0(<16 x float> %vec, <1 ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $21312, %ax # imm = 0x5340 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -130,7 +130,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask0(<16 x float> %vec) ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $21312, %ax # imm = 0x5340 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -141,7 +141,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mask1(<16 x float> %vec, <1 ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-8490, %ax # imm = 0xDED6 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -154,7 +154,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask1(<16 x float> %vec) ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-8490, %ax # imm = 0xDED6 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -165,7 +165,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mask2(<16 x float> %vec, <1 ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $12522, %ax # imm = 0x30EA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -178,7 +178,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask2(<16 x float> %vec) ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $12522, %ax # imm = 0x30EA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -189,7 +189,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mask3(<16 x float> %vec, <1 ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-28344, %ax # imm = 0x9148 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -202,7 +202,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask3(<16 x float> %vec) ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-28344, %ax # imm = 0x9148 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -222,7 +222,7 @@ define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask0(<2 x double>* %v ; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -235,7 +235,7 @@ define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask0(<2 x double>* ; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -247,7 +247,7 @@ define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask1(<2 x double>* %v ; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -260,7 +260,7 @@ define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask1(<2 x double>* ; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -272,7 +272,7 @@ define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask2(<2 x double>* %v ; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -285,7 +285,7 @@ define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask2(<2 x double>* ; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -297,7 +297,7 @@ define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask3(<2 x double>* %v ; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -310,7 +310,7 @@ define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask3(<2 x double>* ; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -331,7 +331,7 @@ define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask0(<2 x double>* %v ; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $21, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -344,7 +344,7 @@ define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask0(<2 x double>* ; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $21, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -356,7 +356,7 @@ define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask1(<2 x double>* %v ; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $82, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -369,7 +369,7 @@ define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask1(<2 x double>* ; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $82, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -381,7 +381,7 @@ define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask2(<2 x double>* %v ; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -394,7 +394,7 @@ define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask2(<2 x double>* ; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -406,7 +406,7 @@ define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask3(<2 x double>* %v ; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-19, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -419,7 +419,7 @@ define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask3(<2 x double>* ; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-19, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -440,7 +440,7 @@ define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask0(<4 x double>* %v ; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $28, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -453,7 +453,7 @@ define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask0(<4 x double>* ; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $28, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -465,7 +465,7 @@ define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask1(<4 x double>* %v ; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-115, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -478,7 +478,7 @@ define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask1(<4 x double>* ; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-115, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -490,7 +490,7 @@ define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask2(<4 x double>* %v ; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-76, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -503,7 +503,7 @@ define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask2(<4 x double>* ; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-76, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -515,7 +515,7 @@ define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask3(<4 x double>* %v ; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-116, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -528,7 +528,7 @@ define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask3(<4 x double>* ; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-116, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -551,7 +551,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, < ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: movb $-49, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -565,7 +565,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp) ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movb $-49, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -578,7 +578,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, < ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: movb $-118, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -592,7 +592,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp) ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movb $-118, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -605,7 +605,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, < ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: movb $-11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -619,7 +619,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp) ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movb $-11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -632,7 +632,7 @@ define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, < ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: movb $-102, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -646,7 +646,7 @@ define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp) ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movb $-102, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -669,7 +669,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: movw $-27027, %ax # imm = 0x966D -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -683,7 +683,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %v ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movw $-27027, %ax # imm = 0x966D -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -696,7 +696,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: movw $29162, %ax # imm = 0x71EA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -710,7 +710,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %v ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movw $29162, %ax # imm = 0x71EA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -723,7 +723,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: movw $-26458, %ax # imm = 0x98A6 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -737,7 +737,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %v ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movw $-26458, %ax # imm = 0x98A6 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -750,7 +750,7 @@ define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: movw $25225, %ax # imm = 0x6289 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -764,7 +764,7 @@ define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %v ; CHECK: # BB#0: ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movw $25225, %ax # imm = 0x6289 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp @@ -785,7 +785,7 @@ define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp, < ; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-109, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -798,7 +798,7 @@ define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp) ; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-109, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -810,7 +810,7 @@ define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp, < ; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $74, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -823,7 +823,7 @@ define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp) ; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $74, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -835,7 +835,7 @@ define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp, < ; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $49, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -848,7 +848,7 @@ define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp) ; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $49, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -860,7 +860,7 @@ define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp, < ; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $48, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -873,7 +873,7 @@ define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp) ; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $48, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -894,7 +894,7 @@ define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %vp, ; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-25378, %ax # imm = 0x9CDE -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -907,7 +907,7 @@ define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %v ; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-25378, %ax # imm = 0x9CDE -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -919,7 +919,7 @@ define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %vp, ; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-22502, %ax # imm = 0xA81A -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -932,7 +932,7 @@ define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %v ; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-22502, %ax # imm = 0xA81A -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -944,7 +944,7 @@ define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %vp, ; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $31229, %ax # imm = 0x79FD -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -957,7 +957,7 @@ define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %v ; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $31229, %ax # imm = 0x79FD -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -969,7 +969,7 @@ define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %vp, ; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $5887, %ax # imm = 0x16FF -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -982,7 +982,7 @@ define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %v ; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $5887, %ax # imm = 0x16FF -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -1003,7 +1003,7 @@ define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %vp, ; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-15887, %ax # imm = 0xC1F1 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1016,7 +1016,7 @@ define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %v ; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-15887, %ax # imm = 0xC1F1 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1028,7 +1028,7 @@ define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %vp, ; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-8077, %ax # imm = 0xE073 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1041,7 +1041,7 @@ define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %v ; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-8077, %ax # imm = 0xE073 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1053,7 +1053,7 @@ define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %vp, ; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-5023, %ax # imm = 0xEC61 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1066,7 +1066,7 @@ define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %v ; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-5023, %ax # imm = 0xEC61 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1078,7 +1078,7 @@ define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %vp, ; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-10326, %ax # imm = 0xD7AA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1091,7 +1091,7 @@ define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %v ; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-10326, %ax # imm = 0xD7AA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll index bc911050d78..d444ac96efc 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-int.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq %s -o - | FileCheck %s ; FIXME: fixing PR34394 should fix the i32x2 memory cases resulting in a simple vbroadcasti32x2 instruction. @@ -15,7 +15,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mask0(<4 x i32> %vec, <4 x i32> %de ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -28,7 +28,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask0(<4 x i32> %vec) { ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> @@ -39,7 +39,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mask1(<4 x i32> %vec, <4 x i32> %de ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -52,7 +52,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask1(<4 x i32> %vec) { ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> @@ -63,7 +63,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mask2(<4 x i32> %vec, <4 x i32> %de ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -76,7 +76,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask2(<4 x i32> %vec) { ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> @@ -87,7 +87,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mask3(<4 x i32> %vec, <4 x i32> %de ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm1 {%k1} ; CHECK-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -100,7 +100,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask3(<4 x i32> %vec) { ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> @@ -119,7 +119,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mask0(<8 x i32> %vec, <8 x i32> %de ; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $92, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -132,7 +132,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask0(<8 x i32> %vec) { ; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $92, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -143,7 +143,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mask1(<8 x i32> %vec, <8 x i32> %de ; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-15, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -156,7 +156,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask1(<8 x i32> %vec) { ; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-15, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -167,7 +167,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mask2(<8 x i32> %vec, <8 x i32> %de ; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-95, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -180,7 +180,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask2(<8 x i32> %vec) { ; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-95, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -191,7 +191,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mask3(<8 x i32> %vec, <8 x i32> %de ; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovdqa %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -204,7 +204,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask3(<8 x i32> %vec) { ; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -223,7 +223,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mask0(<16 x i32> %vec, <16 x i32> ; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-18638, %ax # imm = 0xB732 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -236,7 +236,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask0(<16 x i32> %vec) { ; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-18638, %ax # imm = 0xB732 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -247,7 +247,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mask1(<16 x i32> %vec, <16 x i32> ; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $25429, %ax # imm = 0x6355 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -260,7 +260,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask1(<16 x i32> %vec) { ; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $25429, %ax # imm = 0x6355 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -271,7 +271,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mask2(<16 x i32> %vec, <16 x i32> ; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $27159, %ax # imm = 0x6A17 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -284,7 +284,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask2(<16 x i32> %vec) { ; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $27159, %ax # imm = 0x6A17 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -295,7 +295,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mask3(<16 x i32> %vec, <16 x i32> ; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-22884, %ax # imm = 0xA69C -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -308,7 +308,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask3(<16 x i32> %vec) { ; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-22884, %ax # imm = 0xA69C -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> @@ -330,7 +330,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask0(<2 x i32>* %vp, <4 x i32> ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = xmm1[0,2,0,2] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -344,7 +344,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask0(<2 x i32>* %vp) { ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[0,2,0,2] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -357,7 +357,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask1(<2 x i32>* %vp, <4 x i32> ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = xmm1[0,2,0,2] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -371,7 +371,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask1(<2 x i32>* %vp) { ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[0,2,0,2] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -384,7 +384,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask2(<2 x i32>* %vp, <4 x i32> ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = xmm1[0,2,0,2] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -398,7 +398,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask2(<2 x i32>* %vp) { ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[0,2,0,2] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -411,7 +411,7 @@ define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask3(<2 x i32>* %vp, <4 x i32> ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = xmm1[0,2,0,2] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -425,7 +425,7 @@ define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask3(<2 x i32>* %vp) { ; CHECK: # BB#0: ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[0,2,0,2] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -450,7 +450,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask0(<2 x i32>* %vp, <8 x i32> ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-NEXT: movb $-94, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -465,7 +465,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask0(<2 x i32>* %vp) { ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; CHECK-NEXT: movb $-94, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -479,7 +479,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask1(<2 x i32>* %vp, <8 x i32> ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-NEXT: movb $97, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -494,7 +494,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask1(<2 x i32>* %vp) { ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; CHECK-NEXT: movb $97, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -508,7 +508,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask2(<2 x i32>* %vp, <8 x i32> ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-NEXT: movb $-33, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -523,7 +523,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask2(<2 x i32>* %vp) { ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; CHECK-NEXT: movb $-33, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -537,7 +537,7 @@ define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask3(<2 x i32>* %vp, <8 x i32> ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-NEXT: movb $-111, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -552,7 +552,7 @@ define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask3(<2 x i32>* %vp) { ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; CHECK-NEXT: movb $-111, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -577,7 +577,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp, <16 x i ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2] ; CHECK-NEXT: movw $27158, %ax # imm = 0x6A16 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermd %zmm1, %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -592,7 +592,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask0(<2 x i32>* %vp) { ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2] ; CHECK-NEXT: movw $27158, %ax # imm = 0x6A16 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -606,7 +606,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp, <16 x i ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2] ; CHECK-NEXT: movw $26363, %ax # imm = 0x66FB -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermd %zmm1, %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -621,7 +621,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask1(<2 x i32>* %vp) { ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2] ; CHECK-NEXT: movw $26363, %ax # imm = 0x66FB -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -635,7 +635,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp, <16 x i ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2] ; CHECK-NEXT: movw $-19542, %ax # imm = 0xB3AA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermd %zmm1, %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -650,7 +650,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask2(<2 x i32>* %vp) { ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2] ; CHECK-NEXT: movw $-19542, %ax # imm = 0xB3AA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -664,7 +664,7 @@ define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp, <16 x i ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2] ; CHECK-NEXT: movw $27409, %ax # imm = 0x6B11 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermd %zmm1, %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -679,7 +679,7 @@ define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask3(<2 x i32>* %vp) { ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2] ; CHECK-NEXT: movw $27409, %ax # imm = 0x6B11 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = load <2 x i32>, <2 x i32>* %vp @@ -700,7 +700,7 @@ define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask0(<4 x i32>* %vp, <8 x i32> ; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-87, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -713,7 +713,7 @@ define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask0(<4 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-87, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -725,7 +725,7 @@ define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask1(<4 x i32>* %vp, <8 x i32> ; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -738,7 +738,7 @@ define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask1(<4 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -750,7 +750,7 @@ define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask2(<4 x i32>* %vp, <8 x i32> ; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $114, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -763,7 +763,7 @@ define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask2(<4 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $114, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -775,7 +775,7 @@ define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask3(<4 x i32>* %vp, <8 x i32> ; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $66, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -788,7 +788,7 @@ define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask3(<4 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $66, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -809,7 +809,7 @@ define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask0(<4 x i32>* %vp, <16 x i ; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $10334, %ax # imm = 0x285E -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -822,7 +822,7 @@ define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask0(<4 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $10334, %ax # imm = 0x285E -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -834,7 +834,7 @@ define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask1(<4 x i32>* %vp, <16 x i ; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30962, %ax # imm = 0x870E -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -847,7 +847,7 @@ define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask1(<4 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30962, %ax # imm = 0x870E -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -859,7 +859,7 @@ define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask2(<4 x i32>* %vp, <16 x i ; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $31933, %ax # imm = 0x7CBD -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -872,7 +872,7 @@ define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask2(<4 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $31933, %ax # imm = 0x7CBD -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -884,7 +884,7 @@ define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask3(<4 x i32>* %vp, <16 x i ; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-28744, %ax # imm = 0x8FB8 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -897,7 +897,7 @@ define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask3(<4 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-28744, %ax # imm = 0x8FB8 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i32>, <4 x i32>* %vp @@ -918,7 +918,7 @@ define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask0(<2 x i64>* %vp, <4 x i64> ; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -931,7 +931,7 @@ define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask0(<2 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -943,7 +943,7 @@ define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask1(<2 x i64>* %vp, <4 x i64> ; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -956,7 +956,7 @@ define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask1(<2 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -968,7 +968,7 @@ define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask2(<2 x i64>* %vp, <4 x i64> ; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -981,7 +981,7 @@ define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask2(<2 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -993,7 +993,7 @@ define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask3(<2 x i64>* %vp, <4 x i64> ; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -1006,7 +1006,7 @@ define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask3(<2 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -1027,7 +1027,7 @@ define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask0(<2 x i64>* %vp, <8 x i64> ; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $119, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -1040,7 +1040,7 @@ define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask0(<2 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $119, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -1052,7 +1052,7 @@ define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask1(<2 x i64>* %vp, <8 x i64> ; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-50, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -1065,7 +1065,7 @@ define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask1(<2 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-50, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -1077,7 +1077,7 @@ define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask2(<2 x i64>* %vp, <8 x i64> ; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-33, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -1090,7 +1090,7 @@ define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask2(<2 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-33, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -1102,7 +1102,7 @@ define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask3(<2 x i64>* %vp, <8 x i64> ; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-49, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -1115,7 +1115,7 @@ define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask3(<2 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-49, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x i64>, <2 x i64>* %vp @@ -1136,7 +1136,7 @@ define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask0(<8 x i32>* %vp, <16 x i ; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $12321, %ax # imm = 0x3021 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp @@ -1149,7 +1149,7 @@ define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask0(<8 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $12321, %ax # imm = 0x3021 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp @@ -1161,7 +1161,7 @@ define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask1(<8 x i32>* %vp, <16 x i ; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-39, %ax -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp @@ -1174,7 +1174,7 @@ define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask1(<8 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-39, %ax -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp @@ -1186,7 +1186,7 @@ define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask2(<8 x i32>* %vp, <16 x i ; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-24047, %ax # imm = 0xA211 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp @@ -1199,7 +1199,7 @@ define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask2(<8 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-24047, %ax # imm = 0xA211 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp @@ -1211,7 +1211,7 @@ define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask3(<8 x i32>* %vp, <16 x i ; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $5470, %ax # imm = 0x155E -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp @@ -1224,7 +1224,7 @@ define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask3(<8 x i32>* %vp) { ; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $5470, %ax # imm = 0x155E -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %vp @@ -1245,7 +1245,7 @@ define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask0(<4 x i64>* %vp, <8 x i64> ; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-71, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp @@ -1258,7 +1258,7 @@ define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask0(<4 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-71, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp @@ -1270,7 +1270,7 @@ define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask1(<4 x i64>* %vp, <8 x i64> ; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp @@ -1283,7 +1283,7 @@ define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask1(<4 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp @@ -1295,7 +1295,7 @@ define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask2(<4 x i64>* %vp, <8 x i64> ; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $103, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp @@ -1308,7 +1308,7 @@ define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask2(<4 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $103, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp @@ -1320,7 +1320,7 @@ define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask3(<4 x i64>* %vp, <8 x i64> ; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-83, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp @@ -1333,7 +1333,7 @@ define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask3(<4 x i64>* %vp) { ; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-83, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x i64>, <4 x i64>* %vp diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-high.ll b/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-high.ll index dce20fe92f0..91a9c1ba1a3 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-high.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-high.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s define <4 x float> @test_4xfloat_dup_high(<4 x float> %vec) { ; CHECK-LABEL: test_4xfloat_dup_high: @@ -13,7 +13,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask0(<4 x float> %vec, <4 x fl ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -26,7 +26,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask0(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> @@ -37,7 +37,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask1(<4 x float> %vec, <4 x fl ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -50,7 +50,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask1(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> @@ -61,7 +61,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask2(<4 x float> %vec, <4 x fl ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -74,7 +74,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask2(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> @@ -85,7 +85,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask3(<4 x float> %vec, <4 x fl ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -98,7 +98,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask3(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> @@ -109,7 +109,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mask4(<4 x float> %vec, <4 x fl ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -122,7 +122,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mask4(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3> @@ -142,7 +142,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask0(<4 x float>* %vp, <4 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -155,7 +155,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask0(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -167,7 +167,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask1(<4 x float>* %vp, <4 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -180,7 +180,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask1(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -192,7 +192,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask2(<4 x float>* %vp, <4 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -205,7 +205,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask2(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -217,7 +217,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask3(<4 x float>* %vp, <4 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -230,7 +230,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask3(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -242,7 +242,7 @@ define <4 x float> @test_masked_4xfloat_dup_high_mem_mask4(<4 x float>* %vp, <4 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -255,7 +255,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask4(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -275,7 +275,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask0(<8 x float> %vec, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-106, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -288,7 +288,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask0(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-106, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> @@ -299,7 +299,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask1(<8 x float> %vec, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $114, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -312,7 +312,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask1(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $114, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> @@ -323,7 +323,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask2(<8 x float> %vec, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-104, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -336,7 +336,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask2(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-104, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> @@ -347,7 +347,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask3(<8 x float> %vec, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -360,7 +360,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask3(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> @@ -371,7 +371,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mask4(<8 x float> %vec, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-109, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -384,7 +384,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mask4(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-109, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> @@ -404,7 +404,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask0(<8 x float>* %vp, <8 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $74, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -417,7 +417,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask0(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $74, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -429,7 +429,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask1(<8 x float>* %vp, <8 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $49, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -442,7 +442,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask1(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $49, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -454,7 +454,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask2(<8 x float>* %vp, <8 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $48, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -467,7 +467,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask2(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $48, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -479,7 +479,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask3(<8 x float>* %vp, <8 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-100, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -492,7 +492,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask3(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-100, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -504,7 +504,7 @@ define <8 x float> @test_masked_8xfloat_dup_high_mem_mask4(<8 x float>* %vp, <8 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-89, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -517,7 +517,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask4(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-89, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -537,7 +537,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask0(<16 x float> %vec, <16 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $31229, %ax # imm = 0x79FD -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -550,7 +550,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask0(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $31229, %ax # imm = 0x79FD -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> @@ -561,7 +561,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask1(<16 x float> %vec, <16 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $5887, %ax # imm = 0x16FF -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -574,7 +574,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask1(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $5887, %ax # imm = 0x16FF -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> @@ -585,7 +585,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask2(<16 x float> %vec, <16 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-15887, %ax # imm = 0xC1F1 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -598,7 +598,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask2(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-15887, %ax # imm = 0xC1F1 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> @@ -609,7 +609,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask3(<16 x float> %vec, <16 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-8077, %ax # imm = 0xE073 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -622,7 +622,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask3(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-8077, %ax # imm = 0xE073 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> @@ -633,7 +633,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mask4(<16 x float> %vec, <16 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-5023, %ax # imm = 0xEC61 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -646,7 +646,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mask4(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-5023, %ax # imm = 0xEC61 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15> @@ -666,7 +666,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask0(<16 x float>* %vp, ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-10326, %ax # imm = 0xD7AA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -679,7 +679,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask0(<16 x float>* %vp ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-10326, %ax # imm = 0xD7AA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -691,7 +691,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask1(<16 x float>* %vp, ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-6675, %ax # imm = 0xE5ED -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -704,7 +704,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask1(<16 x float>* %vp ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-6675, %ax # imm = 0xE5ED -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -716,7 +716,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask2(<16 x float>* %vp, ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-5042, %ax # imm = 0xEC4E -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -729,7 +729,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask2(<16 x float>* %vp ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-5042, %ax # imm = 0xEC4E -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -741,7 +741,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask3(<16 x float>* %vp, ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30108, %ax # imm = 0x8A64 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -754,7 +754,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask3(<16 x float>* %vp ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30108, %ax # imm = 0x8A64 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -766,7 +766,7 @@ define <16 x float> @test_masked_16xfloat_dup_high_mem_mask4(<16 x float>* %vp, ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movw $25644, %ax # imm = 0x642C -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -779,7 +779,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask4(<16 x float>* %vp ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movw $25644, %ax # imm = 0x642C -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-low.ll b/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-low.ll index 5f4e3236657..df3e7af2856 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-low.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/duplicate-low.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s define <2 x double> @test_2xdouble_dup_low(<2 x double> %vec) { ; CHECK-LABEL: test_2xdouble_dup_low: @@ -13,7 +13,7 @@ define <2 x double> @test_masked_2xdouble_dup_low_mask0(<2 x double> %vec, <2 x ; CHECK-LABEL: test_masked_2xdouble_dup_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 {%k1} = xmm0[0,0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -26,7 +26,7 @@ define <2 x double> @test_masked_z_2xdouble_dup_low_mask0(<2 x double> %vec) { ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] ; CHECK-NEXT: retq %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> @@ -37,7 +37,7 @@ define <2 x double> @test_masked_2xdouble_dup_low_mask1(<2 x double> %vec, <2 x ; CHECK-LABEL: test_masked_2xdouble_dup_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 {%k1} = xmm0[0,0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -50,7 +50,7 @@ define <2 x double> @test_masked_z_2xdouble_dup_low_mask1(<2 x double> %vec) { ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] ; CHECK-NEXT: retq %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> @@ -70,7 +70,7 @@ define <2 x double> @test_masked_2xdouble_dup_low_mem_mask0(<2 x double>* %vp, < ; CHECK-LABEL: test_masked_2xdouble_dup_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -83,7 +83,7 @@ define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask0(<2 x double>* %vp) ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -95,7 +95,7 @@ define <2 x double> @test_masked_2xdouble_dup_low_mem_mask1(<2 x double>* %vp, < ; CHECK-LABEL: test_masked_2xdouble_dup_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -108,7 +108,7 @@ define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask1(<2 x double>* %vp) ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -128,7 +128,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask0(<4 x double> %vec, <4 x ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -141,7 +141,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask0(<4 x double> %vec) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> @@ -152,7 +152,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask1(<4 x double> %vec, <4 x ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -165,7 +165,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask1(<4 x double> %vec) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> @@ -176,7 +176,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask2(<4 x double> %vec, <4 x ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -189,7 +189,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask2(<4 x double> %vec) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> @@ -200,7 +200,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask3(<4 x double> %vec, <4 x ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -213,7 +213,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask3(<4 x double> %vec) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> @@ -224,7 +224,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mask4(<4 x double> %vec, <4 x ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -237,7 +237,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mask4(<4 x double> %vec) { ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> @@ -257,7 +257,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask0(<4 x double>* %vp, < ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $9, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -270,7 +270,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask0(<4 x double>* %vp) ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $9, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -282,7 +282,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask1(<4 x double>* %vp, < ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -295,7 +295,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask1(<4 x double>* %vp) ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -307,7 +307,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask2(<4 x double>* %vp, < ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -320,7 +320,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask2(<4 x double>* %vp) ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -332,7 +332,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask3(<4 x double>* %vp, < ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -345,7 +345,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask3(<4 x double>* %vp) ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -357,7 +357,7 @@ define <4 x double> @test_masked_4xdouble_dup_low_mem_mask4(<4 x double>* %vp, < ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -370,7 +370,7 @@ define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask4(<4 x double>* %vp) ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -390,7 +390,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask0(<8 x double> %vec, <8 x ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -403,7 +403,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask0(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> @@ -414,7 +414,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask1(<8 x double> %vec, <8 x ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $64, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -427,7 +427,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask1(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $64, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> @@ -438,7 +438,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask2(<8 x double> %vec, <8 x ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-24, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -451,7 +451,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask2(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-24, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> @@ -462,7 +462,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask3(<8 x double> %vec, <8 x ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -475,7 +475,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask3(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> @@ -486,7 +486,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mask4(<8 x double> %vec, <8 x ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-50, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -499,7 +499,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mask4(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-50, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> @@ -519,7 +519,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask0(<8 x double>* %vp, < ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-26, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -532,7 +532,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask0(<8 x double>* %vp) ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-26, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -544,7 +544,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask1(<8 x double>* %vp, < ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $79, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -557,7 +557,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask1(<8 x double>* %vp) ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $79, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -569,7 +569,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask2(<8 x double>* %vp, < ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-70, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -582,7 +582,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask2(<8 x double>* %vp) ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-70, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -594,7 +594,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask3(<8 x double>* %vp, < ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-27, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -607,7 +607,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask3(<8 x double>* %vp) ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-27, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -619,7 +619,7 @@ define <8 x double> @test_masked_8xdouble_dup_low_mem_mask4(<8 x double>* %vp, < ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-82, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -632,7 +632,7 @@ define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask4(<8 x double>* %vp) ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-82, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -652,7 +652,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask0(<4 x float> %vec, <4 x flo ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -665,7 +665,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask0(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> @@ -676,7 +676,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask1(<4 x float> %vec, <4 x flo ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -689,7 +689,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask1(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> @@ -700,7 +700,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask2(<4 x float> %vec, <4 x flo ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -713,7 +713,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask2(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> @@ -724,7 +724,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask3(<4 x float> %vec, <4 x flo ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -737,7 +737,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask3(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> @@ -748,7 +748,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mask4(<4 x float> %vec, <4 x flo ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -761,7 +761,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mask4(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> @@ -781,7 +781,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask0(<4 x float>* %vp, <4 x ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -794,7 +794,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask0(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -806,7 +806,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask1(<4 x float>* %vp, <4 x ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -819,7 +819,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask1(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -831,7 +831,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask2(<4 x float>* %vp, <4 x ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -844,7 +844,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask2(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -856,7 +856,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask3(<4 x float>* %vp, <4 x ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -869,7 +869,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask3(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -881,7 +881,7 @@ define <4 x float> @test_masked_4xfloat_dup_low_mem_mask4(<4 x float>* %vp, <4 x ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $9, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -894,7 +894,7 @@ define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask4(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $9, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -914,7 +914,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask0(<8 x float> %vec, <8 x flo ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-116, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -927,7 +927,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask0(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-116, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> @@ -938,7 +938,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask1(<8 x float> %vec, <8 x flo ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -951,7 +951,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask1(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> @@ -962,7 +962,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask2(<8 x float> %vec, <8 x flo ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-73, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -975,7 +975,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask2(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-73, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> @@ -986,7 +986,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask3(<8 x float> %vec, <8 x flo ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $102, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -999,7 +999,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask3(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $102, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> @@ -1010,7 +1010,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mask4(<8 x float> %vec, <8 x flo ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-46, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1023,7 +1023,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mask4(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-46, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> @@ -1043,7 +1043,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask0(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-86, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1056,7 +1056,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask0(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-86, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1068,7 +1068,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask1(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1081,7 +1081,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask1(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1093,7 +1093,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask2(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1106,7 +1106,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask2(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1118,7 +1118,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask3(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-35, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1131,7 +1131,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask3(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-35, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1143,7 +1143,7 @@ define <8 x float> @test_masked_8xfloat_dup_low_mem_mask4(<8 x float>* %vp, <8 x ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1156,7 +1156,7 @@ define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask4(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -1176,7 +1176,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask0(<16 x float> %vec, <16 x ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $21312, %ax # imm = 0x5340 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1189,7 +1189,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask0(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $21312, %ax # imm = 0x5340 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> @@ -1200,7 +1200,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask1(<16 x float> %vec, <16 x ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-8490, %ax # imm = 0xDED6 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1213,7 +1213,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask1(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-8490, %ax # imm = 0xDED6 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> @@ -1224,7 +1224,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask2(<16 x float> %vec, <16 x ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $12522, %ax # imm = 0x30EA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1237,7 +1237,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask2(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $12522, %ax # imm = 0x30EA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> @@ -1248,7 +1248,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask3(<16 x float> %vec, <16 x ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-28344, %ax # imm = 0x9148 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1261,7 +1261,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask3(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-28344, %ax # imm = 0x9148 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> @@ -1272,7 +1272,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mask4(<16 x float> %vec, <16 x ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movw $15638, %ax # imm = 0x3D16 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1285,7 +1285,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mask4(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movw $15638, %ax # imm = 0x3D16 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> @@ -1305,7 +1305,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask0(<16 x float>* %vp, < ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-2129, %ax # imm = 0xF7AF -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1318,7 +1318,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask0(<16 x float>* %vp) ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-2129, %ax # imm = 0xF7AF -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1330,7 +1330,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask1(<16 x float>* %vp, < ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12900, %ax # imm = 0xCD9C -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1343,7 +1343,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask1(<16 x float>* %vp) ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12900, %ax # imm = 0xCD9C -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1355,7 +1355,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask2(<16 x float>* %vp, < ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $29358, %ax # imm = 0x72AE -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1368,7 +1368,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask2(<16 x float>* %vp) ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $29358, %ax # imm = 0x72AE -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1380,7 +1380,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask3(<16 x float>* %vp, < ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $5272, %ax # imm = 0x1498 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1393,7 +1393,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask3(<16 x float>* %vp) ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $5272, %ax # imm = 0x1498 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1405,7 +1405,7 @@ define <16 x float> @test_masked_16xfloat_dup_low_mem_mask4(<16 x float>* %vp, < ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movw $20975, %ax # imm = 0x51EF -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1418,7 +1418,7 @@ define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask4(<16 x float>* %vp) ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movw $20975, %ax # imm = 0x51EF -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll index 05b3df02acf..d58c12d10d8 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/in_lane_permute.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s ; FIXME: The non immediate <16 x float> test cases should be fixed by PR34382 @@ -15,7 +15,7 @@ define <4 x float> @test_masked_4xfloat_perm_mask0(<4 x float> %vec, <4 x float> ; CHECK-LABEL: test_masked_4xfloat_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[2,1,3,1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -28,7 +28,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mask0(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,3,1] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 1> @@ -39,7 +39,7 @@ define <4 x float> @test_masked_4xfloat_perm_mask1(<4 x float> %vec, <4 x float> ; CHECK-LABEL: test_masked_4xfloat_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,2,3,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -52,7 +52,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mask1(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 2> @@ -63,7 +63,7 @@ define <4 x float> @test_masked_4xfloat_perm_mask2(<4 x float> %vec, <4 x float> ; CHECK-LABEL: test_masked_4xfloat_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,3,2,1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -76,7 +76,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mask2(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3,2,1] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 1> @@ -95,7 +95,7 @@ define <4 x float> @test_masked_4xfloat_perm_mask3(<4 x float> %vec, <4 x float> ; CHECK-LABEL: test_masked_4xfloat_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,2,3,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -108,7 +108,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mask3(<4 x float> %vec) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 2> @@ -128,7 +128,7 @@ define <4 x float> @test_masked_4xfloat_perm_mem_mask0(<4 x float>* %vp, <4 x fl ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[3,3,1,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -141,7 +141,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mem_mask0(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[3,3,1,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -154,7 +154,7 @@ define <4 x float> @test_masked_4xfloat_perm_mem_mask1(<4 x float>* %vp, <4 x fl ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[1,3,2,0] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -167,7 +167,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mem_mask1(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[1,3,2,0] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -180,7 +180,7 @@ define <4 x float> @test_masked_4xfloat_perm_mem_mask2(<4 x float>* %vp, <4 x fl ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[2,1,3,2] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -193,7 +193,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mem_mask2(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -215,7 +215,7 @@ define <4 x float> @test_masked_4xfloat_perm_mem_mask3(<4 x float>* %vp, <4 x fl ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[0,1,3,0] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -228,7 +228,7 @@ define <4 x float> @test_masked_z_4xfloat_perm_mem_mask3(<4 x float>* %vp) { ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,0] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp @@ -249,7 +249,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> ; CHECK-LABEL: test_masked_8xfloat_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $83, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,6,6,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -262,7 +262,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $83, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,6,6,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 6, i32 6> @@ -273,7 +273,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mask1(<8 x float> %vec, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-34, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,7,6,7,6] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -286,7 +286,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mask1(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-34, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,7,6,7,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 2, i32 7, i32 6, i32 7, i32 6> @@ -297,7 +297,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> ; CHECK-LABEL: test_masked_8xfloat_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $49, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,1,2,1,6,5,4,4] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -310,7 +310,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $49, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,2,1,6,5,4,4] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 1, i32 2, i32 1, i32 6, i32 5, i32 4, i32 4> @@ -329,7 +329,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mask3(<8 x float> %vec, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-111, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,2,1,0,6,6,5,4] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -342,7 +342,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mask3(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-111, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,1,0,6,6,5,4] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 2, i32 1, i32 0, i32 6, i32 6, i32 5, i32 4> @@ -353,7 +353,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask4(<8 x float> %vec, <8 x float> ; CHECK-LABEL: test_masked_8xfloat_perm_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $61, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,3,7,7,6,5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -366,7 +366,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask4(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask4: ; CHECK: # BB#0: ; CHECK-NEXT: movb $61, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,3,7,7,6,5] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 6, i32 5> @@ -377,7 +377,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mask5(<8 x float> %vec, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3,6,5,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -390,7 +390,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mask5(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3,6,5,7,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 3, i32 6, i32 5, i32 7, i32 7> @@ -409,7 +409,7 @@ define <8 x float> @test_masked_8xfloat_perm_mask6(<8 x float> %vec, <8 x float> ; CHECK-LABEL: test_masked_8xfloat_perm_mask6: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-51, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,5,6,7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -422,7 +422,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mask6(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask6: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-51, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,5,6,7,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 2, i32 5, i32 6, i32 7, i32 7> @@ -433,7 +433,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mask7(<8 x float> %vec, <8 x fl ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $114, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,0,2,1,7,4,6,5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -446,7 +446,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mask7(<8 x float> %vec) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $114, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,2,1,7,4,6,5] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 0, i32 2, i32 1, i32 7, i32 4, i32 6, i32 5> @@ -468,7 +468,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x fl ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps (%rdi), %ymm1 ; CHECK-NEXT: movb $-95, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm1[3,0,0,2,4,6,7,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -482,7 +482,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp) { ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps (%rdi), %ymm0 ; CHECK-NEXT: movb $-95, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,2,4,6,7,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -495,7 +495,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp, <8 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-41, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[2,0,2,2,6,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -508,7 +508,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-41, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[2,0,2,2,6,4,6,6] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -522,7 +522,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x fl ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps (%rdi), %ymm1 ; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm1[2,1,1,3,4,4,7,4] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -536,7 +536,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp) { ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps (%rdi), %ymm0 ; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,1,3,4,4,7,4] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -558,7 +558,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp, <8 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-70, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[0,0,3,3,4,4,7,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -571,7 +571,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-70, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[0,0,3,3,4,4,7,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -585,7 +585,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask4(<8 x float>* %vp, <8 x fl ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps (%rdi), %ymm1 ; CHECK-NEXT: movb $30, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm1[0,1,0,1,4,6,5,4] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -599,7 +599,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask4(<8 x float>* %vp) { ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps (%rdi), %ymm0 ; CHECK-NEXT: movb $30, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,0,1,4,6,5,4] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -612,7 +612,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp, <8 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $56, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[2,0,0,3,6,4,4,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -625,7 +625,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movb $56, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[2,0,0,3,6,4,4,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -649,7 +649,7 @@ define <8 x float> @test_masked_8xfloat_perm_mem_mask6(<8 x float>* %vp, <8 x fl ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps (%rdi), %ymm1 ; CHECK-NEXT: movb $-54, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm1[0,1,2,3,7,4,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -663,7 +663,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_mem_mask6(<8 x float>* %vp) { ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps (%rdi), %ymm0 ; CHECK-NEXT: movb $-54, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -676,7 +676,7 @@ define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp, <8 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $85, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[0,2,3,1,4,6,7,5] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -689,7 +689,7 @@ define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp) { ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movb $85, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,1,4,6,7,5] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp @@ -712,7 +712,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x fl ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15] ; CHECK-NEXT: movw $16429, %ax # imm = 0x402D -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -726,7 +726,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec) { ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15] ; CHECK-NEXT: movw $16429, %ax # imm = 0x402D -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 1, i32 6, i32 4, i32 6, i32 5, i32 8, i32 9, i32 8, i32 11, i32 13, i32 13, i32 13, i32 15> @@ -737,7 +737,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mask1(<16 x float> %vec, <16 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-26425, %ax # imm = 0x98C7 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,2,2,1,6,6,6,5,10,10,10,9,14,14,14,13] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -750,7 +750,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask1(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-26425, %ax # imm = 0x98C7 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,2,2,1,6,6,6,5,10,10,10,9,14,14,14,13] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 1, i32 6, i32 6, i32 6, i32 5, i32 10, i32 10, i32 10, i32 9, i32 14, i32 14, i32 14, i32 13> @@ -762,7 +762,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x fl ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12] ; CHECK-NEXT: movw $28987, %ax # imm = 0x713B -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -776,7 +776,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec) { ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12] ; CHECK-NEXT: movw $28987, %ax # imm = 0x713B -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 0, i32 0, i32 5, i32 4, i32 6, i32 5, i32 11, i32 10, i32 9, i32 9, i32 14, i32 13, i32 14, i32 12> @@ -795,7 +795,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mask3(<16 x float> %vec, <16 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $11457, %ax # imm = 0x2CC1 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -808,7 +808,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask3(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $11457, %ax # imm = 0x2CC1 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 0, i32 2, i32 5, i32 5, i32 4, i32 6, i32 9, i32 9, i32 8, i32 10, i32 13, i32 13, i32 12, i32 14> @@ -820,7 +820,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask4(<16 x float> %vec, <16 x fl ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15] ; CHECK-NEXT: movw $30908, %ax # imm = 0x78BC -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -834,7 +834,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask4(<16 x float> %vec) { ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15] ; CHECK-NEXT: movw $30908, %ax # imm = 0x78BC -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 3, i32 3, i32 5, i32 5, i32 5, i32 7, i32 11, i32 11, i32 8, i32 11, i32 14, i32 12, i32 14, i32 15> @@ -845,7 +845,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mask5(<16 x float> %vec, <16 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movw $26863, %ax # imm = 0x68EF -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,1,0,5,6,5,4,9,10,9,8,13,14,13,12] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -858,7 +858,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask5(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movw $26863, %ax # imm = 0x68EF -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,1,0,5,6,5,4,9,10,9,8,13,14,13,12] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 1, i32 0, i32 5, i32 6, i32 5, i32 4, i32 9, i32 10, i32 9, i32 8, i32 13, i32 14, i32 13, i32 12> @@ -879,7 +879,7 @@ define <16 x float> @test_masked_16xfloat_perm_mask6(<16 x float> %vec, <16 x fl ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13] ; CHECK-NEXT: movw $-28239, %ax # imm = 0x91B1 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm1 {%k1} ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -893,7 +893,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mask6(<16 x float> %vec) { ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13] ; CHECK-NEXT: movw $-28239, %ax # imm = 0x91B1 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 2, i32 4, i32 4, i32 6, i32 7, i32 9, i32 11, i32 8, i32 11, i32 13, i32 12, i32 13, i32 13> @@ -904,7 +904,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mask7(<16 x float> %vec, <16 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-32205, %ax # imm = 0x8233 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[3,3,0,2,7,7,4,6,11,11,8,10,15,15,12,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -917,7 +917,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mask7(<16 x float> %vec) { ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-32205, %ax # imm = 0x8233 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,0,2,7,7,4,6,11,11,8,10,15,15,12,14] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 0, i32 2, i32 7, i32 7, i32 4, i32 6, i32 11, i32 11, i32 8, i32 10, i32 15, i32 15, i32 12, i32 14> @@ -939,7 +939,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12] ; CHECK-NEXT: movw $-22887, %ax # imm = 0xA699 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -953,7 +953,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp) { ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12] ; CHECK-NEXT: movw $-22887, %ax # imm = 0xA699 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -966,7 +966,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp, ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $22744, %ax # imm = 0x58D8 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,3,2,1,5,7,6,5,9,11,10,9,13,15,14,13] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -979,7 +979,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $22744, %ax # imm = 0x58D8 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,3,2,1,5,7,6,5,9,11,10,9,13,15,14,13] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -993,7 +993,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13] ; CHECK-NEXT: movw $-8399, %ax # imm = 0xDF31 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1007,7 +1007,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp) { ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13] ; CHECK-NEXT: movw $-8399, %ax # imm = 0xDF31 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1029,7 +1029,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp, ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $18246, %ax # imm = 0x4746 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1042,7 +1042,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $18246, %ax # imm = 0x4746 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1056,7 +1056,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask4(<16 x float>* %vp, <16 ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12] ; CHECK-NEXT: movw $1218, %ax # imm = 0x4C2 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1070,7 +1070,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask4(<16 x float>* %vp) { ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12] ; CHECK-NEXT: movw $1218, %ax # imm = 0x4C2 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1083,7 +1083,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp, ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movw $2665, %ax # imm = 0xA69 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[2,0,0,1,6,4,4,5,10,8,8,9,14,12,12,13] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1096,7 +1096,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask5: ; CHECK: # BB#0: ; CHECK-NEXT: movw $2665, %ax # imm = 0xA69 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[2,0,0,1,6,4,4,5,10,8,8,9,14,12,12,13] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1120,7 +1120,7 @@ define <16 x float> @test_masked_16xfloat_perm_mem_mask6(<16 x float>* %vp, <16 ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15] ; CHECK-NEXT: movw $-20907, %ax # imm = 0xAE55 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1134,7 +1134,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_mem_mask6(<16 x float>* %vp) { ; CHECK: # BB#0: ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15] ; CHECK-NEXT: movw $-20907, %ax # imm = 0xAE55 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0 {%k1} {z} ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1147,7 +1147,7 @@ define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp, ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-28944, %ax # imm = 0x8EF0 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,2,0,1,5,6,4,5,9,10,8,9,13,14,12,13] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1160,7 +1160,7 @@ define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask7: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-28944, %ax # imm = 0x8EF0 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,2,0,1,5,6,4,5,9,10,8,9,13,14,12,13] ; CHECK-NEXT: retq %vec = load <16 x float>, <16 x float>* %vp @@ -1181,7 +1181,7 @@ define <2 x double> @test_masked_2xdouble_perm_mask0(<2 x double> %vec, <2 x dou ; CHECK-LABEL: test_masked_2xdouble_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 {%k1} = xmm0[1,0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -1194,7 +1194,7 @@ define <2 x double> @test_masked_z_2xdouble_perm_mask0(<2 x double> %vec) { ; CHECK-LABEL: test_masked_z_2xdouble_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0] ; CHECK-NEXT: retq %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0> @@ -1205,7 +1205,7 @@ define <2 x double> @test_masked_2xdouble_perm_mask1(<2 x double> %vec, <2 x dou ; CHECK-LABEL: test_masked_2xdouble_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 {%k1} = xmm0[1,0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -1218,7 +1218,7 @@ define <2 x double> @test_masked_z_2xdouble_perm_mask1(<2 x double> %vec) { ; CHECK-LABEL: test_masked_z_2xdouble_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0] ; CHECK-NEXT: retq %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0> @@ -1238,7 +1238,7 @@ define <2 x double> @test_masked_2xdouble_perm_mem_mask0(<2 x double>* %vp, <2 x ; CHECK-LABEL: test_masked_2xdouble_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} = mem[1,0] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -1251,7 +1251,7 @@ define <2 x double> @test_masked_z_2xdouble_perm_mem_mask0(<2 x double>* %vp) { ; CHECK-LABEL: test_masked_z_2xdouble_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = mem[1,0] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -1264,7 +1264,7 @@ define <2 x double> @test_masked_2xdouble_perm_mem_mask1(<2 x double>* %vp, <2 x ; CHECK-LABEL: test_masked_2xdouble_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} = mem[1,0] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -1277,7 +1277,7 @@ define <2 x double> @test_masked_z_2xdouble_perm_mem_mask1(<2 x double>* %vp) { ; CHECK-LABEL: test_masked_z_2xdouble_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = mem[1,0] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp @@ -1298,7 +1298,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x dou ; CHECK-LABEL: test_masked_4xdouble_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,0,2,3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1311,7 +1311,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,2,3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3> @@ -1322,7 +1322,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x dou ; CHECK-LABEL: test_masked_4xdouble_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,1,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1335,7 +1335,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,2,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2> @@ -1346,7 +1346,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x dou ; CHECK-LABEL: test_masked_4xdouble_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $9, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[0,1,3,3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1359,7 +1359,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $9, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,3,3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3> @@ -1378,7 +1378,7 @@ define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x dou ; CHECK-LABEL: test_masked_4xdouble_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,1,2,2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1391,7 +1391,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,2,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2> @@ -1411,7 +1411,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[0,1,2,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -1424,7 +1424,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -1437,7 +1437,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[0,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -1450,7 +1450,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[0,1,3,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -1463,7 +1463,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[1,0,3,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -1476,7 +1476,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,3,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -1498,7 +1498,7 @@ define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[1,0,3,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -1511,7 +1511,7 @@ define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp) { ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,3,2] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp @@ -1532,7 +1532,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x dou ; CHECK-LABEL: test_masked_8xdouble_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-107, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,0,3,2,4,5,7,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1545,7 +1545,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-107, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,3,2,4,5,7,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 2, i32 4, i32 5, i32 7, i32 6> @@ -1556,7 +1556,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask1(<8 x double> %vec, <8 x dou ; CHECK-LABEL: test_masked_8xdouble_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-39, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,7,6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1569,7 +1569,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask1(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-39, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,7,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 6> @@ -1580,7 +1580,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x dou ; CHECK-LABEL: test_masked_8xdouble_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-53, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,3,5,5,6,7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1593,7 +1593,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-53, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,3,5,5,6,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 5, i32 5, i32 6, i32 7> @@ -1612,7 +1612,7 @@ define <8 x double> @test_masked_8xdouble_perm_mask3(<8 x double> %vec, <8 x dou ; CHECK-LABEL: test_masked_8xdouble_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-89, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,2,4,4,6,7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1625,7 +1625,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mask3(<8 x double> %vec) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-89, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 7> @@ -1645,7 +1645,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-95, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,5,4,7,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -1658,7 +1658,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-95, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,5,4,7,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -1671,7 +1671,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask1(<8 x double>* %vp, <8 x ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $27, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[0,1,3,3,4,5,7,7] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -1684,7 +1684,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask1(<8 x double>* %vp) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $27, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[0,1,3,3,4,5,7,7] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -1697,7 +1697,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-116, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,4,7,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -1710,7 +1710,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-116, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,4,7,6] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -1732,7 +1732,7 @@ define <8 x double> @test_masked_8xdouble_perm_mem_mask3(<8 x double>* %vp, <8 x ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $89, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[1,0,3,2,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp @@ -1745,7 +1745,7 @@ define <8 x double> @test_masked_z_8xdouble_perm_mem_mask3(<8 x double>* %vp) { ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $89, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,3,2,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x double>, <8 x double>* %vp diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index 4966a6c27ff..92de1cc4730 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s ; FIXME: All cases here should be fixed by PR34380 @@ -4146,10 +4146,10 @@ define <2 x double> @test_masked_8xdouble_to_2xdouble_perm_mask1(<8 x double> %v ; CHECK: # BB#0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1} -; CHECK-NEXT: vmovapd %xmm1, %xmm0 +; CHECK-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <2 x i32> <i32 3, i32 7> @@ -4162,9 +4162,10 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mask1(<8 x double> ; CHECK: # BB#0: ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: kmovd %eax, %k1 -; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z} +; CHECK-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <2 x i32> <i32 3, i32 7> diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/permute.ll index e13390facbd..e81f11b6b1a 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/permute.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s define <16 x i16> @test_16xi16_perm_mask0(<16 x i16> %vec) { ; CHECK-LABEL: test_16xi16_perm_mask0: diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-interleave.ll b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-interleave.ll index aab0c6827a7..e5b03c2221c 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-interleave.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-interleave.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s define <4 x float> @test_4xfloat_shuff_mask0(<4 x float> %vec1, <4 x float> %vec2) { ; CHECK-LABEL: test_4xfloat_shuff_mask0: @@ -13,7 +13,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mask0(<4 x float> %vec1, <4 x floa ; CHECK-LABEL: test_4xfloat_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[2,1],xmm1[3,1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -26,7 +26,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mask0(<4 x float> %vec1, <4 x ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1],xmm1[3,1] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 5> @@ -37,7 +37,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mask1(<4 x float> %vec1, <4 x floa ; CHECK-LABEL: test_4xfloat_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[1,2],xmm1[3,2] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -50,7 +50,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mask1(<4 x float> %vec1, <4 x ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2],xmm1[3,2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 2, i32 7, i32 6> @@ -61,7 +61,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mask2(<4 x float> %vec1, <4 x floa ; CHECK-LABEL: test_4xfloat_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[1,3],xmm1[2,1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -74,7 +74,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mask2(<4 x float> %vec1, <4 x ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],xmm1[2,1] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 3, i32 6, i32 5> @@ -93,7 +93,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mask3(<4 x float> %vec1, <4 x floa ; CHECK-LABEL: test_4xfloat_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm2 {%k1} = xmm0[3,3],xmm1[3,3] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -106,7 +106,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mask3(<4 x float> %vec1, <4 x ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3],xmm1[3,3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 3, i32 3, i32 7, i32 7> @@ -126,7 +126,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mem_mask0(<4 x float> %vec1, <4 x ; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,0],mem[1,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -140,7 +140,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask0(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0],mem[1,2] ; CHECK-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p @@ -153,7 +153,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mem_mask1(<4 x float> %vec1, <4 x ; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[3,3],mem[1,3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -167,7 +167,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask1(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3],mem[1,3] ; CHECK-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p @@ -180,7 +180,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mem_mask2(<4 x float> %vec1, <4 x ; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,3],mem[2,0] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -194,7 +194,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask2(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],mem[2,0] ; CHECK-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p @@ -216,7 +216,7 @@ define <4 x float> @test_4xfloat_masked_shuff_mem_mask3(<4 x float> %vec1, <4 x ; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[2,1],mem[3,2] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -230,7 +230,7 @@ define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask3(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1],mem[3,2] ; CHECK-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p @@ -251,7 +251,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x floa ; CHECK-LABEL: test_8xfloat_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -264,7 +264,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 1, i32 3, i32 8, i32 10, i32 5, i32 7, i32 12, i32 14> @@ -275,7 +275,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x floa ; CHECK-LABEL: test_8xfloat_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[0,3],ymm1[3,1],ymm0[4,7],ymm1[7,5] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -288,7 +288,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3],ymm1[3,1],ymm0[4,7],ymm1[7,5] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 3, i32 11, i32 9, i32 4, i32 7, i32 15, i32 13> @@ -299,7 +299,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x floa ; CHECK-LABEL: test_8xfloat_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-35, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[0,2],ymm1[2,2],ymm0[4,6],ymm1[6,6] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -312,7 +312,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-35, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2],ymm1[2,2],ymm0[4,6],ymm1[6,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 2, i32 10, i32 10, i32 4, i32 6, i32 14, i32 14> @@ -331,7 +331,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x floa ; CHECK-LABEL: test_8xfloat_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm2 {%k1} = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -344,7 +344,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 2, i32 11, i32 10, i32 7, i32 6, i32 15, i32 14> @@ -364,7 +364,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-106, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -378,7 +378,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-106, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4] ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -391,7 +391,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $114, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[2,2],mem[1,0],ymm0[6,6],mem[5,4] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -405,7 +405,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $114, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2],mem[1,0],ymm0[6,6],mem[5,4] ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -418,7 +418,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-104, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,3],mem[3,3],ymm0[7,7],mem[7,7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -432,7 +432,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-104, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3],mem[3,3],ymm0[7,7],mem[7,7] ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -454,7 +454,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -468,7 +468,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5] ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -489,7 +489,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x ; CHECK-LABEL: test_16xfloat_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-19315, %ax # imm = 0xB48D -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -502,7 +502,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, < ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-19315, %ax # imm = 0xB48D -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 2, i32 19, i32 18, i32 7, i32 6, i32 23, i32 22, i32 11, i32 10, i32 27, i32 26, i32 15, i32 14, i32 31, i32 30> @@ -513,7 +513,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x ; CHECK-LABEL: test_16xfloat_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $18064, %ax # imm = 0x4690 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[1,2],zmm1[3,3],zmm0[5,6],zmm1[7,7],zmm0[9,10],zmm1[11,11],zmm0[13,14],zmm1[15,15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -526,7 +526,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, < ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $18064, %ax # imm = 0x4690 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2],zmm1[3,3],zmm0[5,6],zmm1[7,7],zmm0[9,10],zmm1[11,11],zmm0[13,14],zmm1[15,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 1, i32 2, i32 19, i32 19, i32 5, i32 6, i32 23, i32 23, i32 9, i32 10, i32 27, i32 27, i32 13, i32 14, i32 31, i32 31> @@ -537,7 +537,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x ; CHECK-LABEL: test_16xfloat_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12346, %ax # imm = 0xCFC6 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[3,0],zmm1[2,1],zmm0[7,4],zmm1[6,5],zmm0[11,8],zmm1[10,9],zmm0[15,12],zmm1[14,13] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -550,7 +550,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, < ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12346, %ax # imm = 0xCFC6 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0],zmm1[2,1],zmm0[7,4],zmm1[6,5],zmm0[11,8],zmm1[10,9],zmm0[15,12],zmm1[14,13] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 0, i32 18, i32 17, i32 7, i32 4, i32 22, i32 21, i32 11, i32 8, i32 26, i32 25, i32 15, i32 12, i32 30, i32 29> @@ -569,7 +569,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x ; CHECK-LABEL: test_16xfloat_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-9865, %ax # imm = 0xD977 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm2 {%k1} = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -582,7 +582,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, < ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-9865, %ax # imm = 0xD977 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 3, i32 16, i32 18, i32 6, i32 7, i32 20, i32 22, i32 10, i32 11, i32 24, i32 26, i32 14, i32 15, i32 28, i32 30> @@ -602,7 +602,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <1 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $7677, %ax # imm = 0x1DFD -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -616,7 +616,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $7677, %ax # imm = 0x1DFD -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -629,7 +629,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <1 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $14448, %ax # imm = 0x3870 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[0,2],mem[3,2],zmm0[4,6],mem[7,6],zmm0[8,10],mem[11,10],zmm0[12,14],mem[15,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -643,7 +643,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $14448, %ax # imm = 0x3870 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[0,2],mem[3,2],zmm0[4,6],mem[7,6],zmm0[8,10],mem[11,10],zmm0[12,14],mem[15,14] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -656,7 +656,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <1 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-13463, %ax # imm = 0xCB69 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[2,0],mem[2,2],zmm0[6,4],mem[6,6],zmm0[10,8],mem[10,10],zmm0[14,12],mem[14,14] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -670,7 +670,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-13463, %ax # imm = 0xCB69 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0],mem[2,2],zmm0[6,4],mem[6,6],zmm0[10,8],mem[10,10],zmm0[14,12],mem[14,14] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -692,7 +692,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <1 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $21793, %ax # imm = 0x5521 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -706,7 +706,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $21793, %ax # imm = 0x5521 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -727,7 +727,7 @@ define <2 x double> @test_2xdouble_masked_shuff_mask0(<2 x double> %vec1, <2 x d ; CHECK-LABEL: test_2xdouble_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -740,7 +740,7 @@ define <2 x double> @test_2xdouble_zero_masked_shuff_mask0(<2 x double> %vec1, < ; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0] ; CHECK-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2> @@ -751,7 +751,7 @@ define <2 x double> @test_2xdouble_masked_shuff_mask1(<2 x double> %vec1, <2 x d ; CHECK-LABEL: test_2xdouble_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -764,7 +764,7 @@ define <2 x double> @test_2xdouble_zero_masked_shuff_mask1(<2 x double> %vec1, < ; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0] ; CHECK-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2> @@ -784,7 +784,7 @@ define <2 x double> @test_2xdouble_masked_shuff_mem_mask0(<2 x double> %vec1, <2 ; CHECK-LABEL: test_2xdouble_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -798,7 +798,7 @@ define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask0(<2 x double> %vec ; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[0] ; CHECK-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p @@ -811,7 +811,7 @@ define <2 x double> @test_2xdouble_masked_shuff_mem_mask1(<2 x double> %vec1, <2 ; CHECK-LABEL: test_2xdouble_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -825,7 +825,7 @@ define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask1(<2 x double> %vec ; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[0] ; CHECK-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p @@ -846,7 +846,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x d ; CHECK-LABEL: test_4xdouble_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[3],ymm1[3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -859,7 +859,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, < ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[3],ymm1[3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 7> @@ -870,7 +870,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x d ; CHECK-LABEL: test_4xdouble_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[3],ymm1[2] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -883,7 +883,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, < ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[3],ymm1[2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 6> @@ -894,7 +894,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x d ; CHECK-LABEL: test_4xdouble_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[0],ymm0[3],ymm1[2] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -907,7 +907,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, < ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[0],ymm0[3],ymm1[2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 4, i32 3, i32 6> @@ -926,7 +926,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x d ; CHECK-LABEL: test_4xdouble_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -939,7 +939,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, < ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 7> @@ -959,7 +959,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -973,7 +973,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[2] ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -986,7 +986,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[1],ymm0[2],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1000,7 +1000,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[1],ymm0[2],mem[2] ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -1013,7 +1013,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[3],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1027,7 +1027,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[3],mem[2] ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -1049,7 +1049,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[2],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1063,7 +1063,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[2],mem[2] ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -1084,7 +1084,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x d ; CHECK-LABEL: test_8xdouble_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-77, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1097,7 +1097,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, < ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-77, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 4, i32 13, i32 7, i32 15> @@ -1108,7 +1108,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x d ; CHECK-LABEL: test_8xdouble_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $107, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[5],zmm1[5],zmm0[6],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1121,7 +1121,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, < ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $107, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[5],zmm1[5],zmm0[6],zmm1[7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 5, i32 13, i32 6, i32 15> @@ -1132,7 +1132,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x d ; CHECK-LABEL: test_8xdouble_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-87, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[6] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1145,7 +1145,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, < ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-87, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 13, i32 6, i32 14> @@ -1164,7 +1164,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x d ; CHECK-LABEL: test_8xdouble_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1177,7 +1177,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, < ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 12, i32 7, i32 15> @@ -1197,7 +1197,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $72, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1211,7 +1211,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $72, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -1224,7 +1224,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[0],zmm0[3],mem[2],zmm0[4],mem[4],zmm0[7],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1238,7 +1238,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[0],zmm0[3],mem[2],zmm0[4],mem[4],zmm0[7],mem[7] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -1251,7 +1251,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $26, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[2],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1265,7 +1265,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $26, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[2],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -1287,7 +1287,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-39, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1301,7 +1301,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-39, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-vec.ll b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-vec.ll index 674d039999f..091a26c624e 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-vec.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-vec.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s ; FIXME: 128-bit shuffles of 256-bit vectors cases should be fixed by PR34359 @@ -16,7 +16,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x floa ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $-41, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> @@ -29,7 +29,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $-41, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> @@ -41,7 +41,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x floa ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $-63, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> @@ -54,7 +54,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $-63, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> @@ -66,7 +66,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x floa ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $107, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> @@ -79,7 +79,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $107, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> @@ -99,7 +99,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x floa ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $66, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmps %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> @@ -112,7 +112,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $66, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> @@ -133,7 +133,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $-24, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -147,7 +147,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $-24, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -161,7 +161,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $-6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -175,7 +175,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $-6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -189,7 +189,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $-50, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -203,7 +203,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $-50, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -226,7 +226,7 @@ define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $-26, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -240,7 +240,7 @@ define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $-26, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -261,7 +261,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x ; CHECK-LABEL: test_16xfloat_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-11480, %ax # imm = 0xD328 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -274,7 +274,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, < ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-11480, %ax # imm = 0xD328 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> @@ -285,7 +285,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x ; CHECK-LABEL: test_16xfloat_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-21749, %ax # imm = 0xAB0B -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -298,7 +298,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, < ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-21749, %ax # imm = 0xAB0B -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 28, i32 29, i32 30, i32 31> @@ -309,7 +309,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x ; CHECK-LABEL: test_16xfloat_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $75, %ax -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -322,7 +322,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, < ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $75, %ax -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> @@ -341,7 +341,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x ; CHECK-LABEL: test_16xfloat_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $32347, %ax # imm = 0x7E5B -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -354,7 +354,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, < ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $32347, %ax # imm = 0x7E5B -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27> @@ -374,7 +374,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <1 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-19232, %ax # imm = 0xB4E0 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -388,7 +388,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-19232, %ax # imm = 0xB4E0 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -401,7 +401,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <1 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-29660, %ax # imm = 0x8C24 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -415,7 +415,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-29660, %ax # imm = 0x8C24 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -428,7 +428,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <1 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12160, %ax # imm = 0xD080 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -442,7 +442,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12160, %ax # imm = 0xD080 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -464,7 +464,7 @@ define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <1 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30129, %ax # imm = 0x8A4F -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -478,7 +478,7 @@ define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30129, %ax # imm = 0x8A4F -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -500,7 +500,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x d ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> @@ -513,7 +513,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, < ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> @@ -525,7 +525,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x d ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> @@ -538,7 +538,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, < ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> @@ -550,7 +550,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x d ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> @@ -563,7 +563,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, < ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> @@ -583,7 +583,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x d ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmpd %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> @@ -596,7 +596,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, < ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> @@ -617,7 +617,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -631,7 +631,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -645,7 +645,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -659,7 +659,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -673,7 +673,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -687,7 +687,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -710,7 +710,7 @@ define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -724,7 +724,7 @@ define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec ; CHECK: # BB#0: ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -745,7 +745,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x d ; CHECK-LABEL: test_8xdouble_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -758,7 +758,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, < ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9> @@ -769,7 +769,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x d ; CHECK-LABEL: test_8xdouble_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-70, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -782,7 +782,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, < ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-70, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13> @@ -793,7 +793,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x d ; CHECK-LABEL: test_8xdouble_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $30, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -806,7 +806,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, < ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $30, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 12, i32 13, i32 8, i32 9> @@ -825,7 +825,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x d ; CHECK-LABEL: test_8xdouble_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $56, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -838,7 +838,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, < ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $56, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11> @@ -858,7 +858,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $95, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -872,7 +872,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $95, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -885,7 +885,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -899,7 +899,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -912,7 +912,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -926,7 +926,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -948,7 +948,7 @@ define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -962,7 +962,7 @@ define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -984,7 +984,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2 ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $26, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> @@ -997,7 +997,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $26, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> @@ -1009,7 +1009,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2 ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $-4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> @@ -1022,7 +1022,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $-4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> @@ -1034,7 +1034,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2 ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $51, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> @@ -1047,7 +1047,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $51, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> @@ -1067,7 +1067,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2 ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $92, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> @@ -1080,7 +1080,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $92, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> @@ -1101,7 +1101,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $64, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p @@ -1115,7 +1115,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $64, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p @@ -1129,7 +1129,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $-104, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p @@ -1143,7 +1143,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $-104, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p @@ -1157,7 +1157,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $113, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p @@ -1171,7 +1171,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $113, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p @@ -1194,7 +1194,7 @@ define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $45, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p @@ -1208,7 +1208,7 @@ define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $45, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <8 x i32>, <8 x i32>* %vec2p @@ -1229,7 +1229,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> % ; CHECK-LABEL: test_16xi32_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $2995, %ax # imm = 0xBB3 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1242,7 +1242,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $2995, %ax # imm = 0xBB3 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31> @@ -1253,7 +1253,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> % ; CHECK-LABEL: test_16xi32_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $18408, %ax # imm = 0x47E8 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1266,7 +1266,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $18408, %ax # imm = 0x47E8 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> @@ -1277,7 +1277,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> % ; CHECK-LABEL: test_16xi32_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $15737, %ax # imm = 0x3D79 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1290,7 +1290,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $15737, %ax # imm = 0x3D79 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19> @@ -1309,7 +1309,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> % ; CHECK-LABEL: test_16xi32_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-3073, %ax # imm = 0xF3FF -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1322,7 +1322,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-3073, %ax # imm = 0xF3FF -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] ; CHECK-NEXT: retq %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23> @@ -1342,7 +1342,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i3 ; CHECK-LABEL: test_16xi32_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-8166, %ax # imm = 0xE01A -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1356,7 +1356,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-8166, %ax # imm = 0xE01A -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] ; CHECK-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p @@ -1369,7 +1369,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i3 ; CHECK-LABEL: test_16xi32_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-28302, %ax # imm = 0x9172 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1383,7 +1383,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-28302, %ax # imm = 0x9172 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] ; CHECK-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p @@ -1396,7 +1396,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i3 ; CHECK-LABEL: test_16xi32_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $27158, %ax # imm = 0x6A16 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1410,7 +1410,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $27158, %ax # imm = 0x6A16 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] ; CHECK-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p @@ -1432,7 +1432,7 @@ define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i3 ; CHECK-LABEL: test_16xi32_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $26363, %ax # imm = 0x66FB -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1446,7 +1446,7 @@ define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $26363, %ax # imm = 0x66FB -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] ; CHECK-NEXT: retq %vec2 = load <16 x i32>, <16 x i32>* %vec2p @@ -1468,7 +1468,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2 ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> @@ -1481,7 +1481,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> @@ -1493,7 +1493,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2 ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> @@ -1506,7 +1506,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> @@ -1518,7 +1518,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2 ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> @@ -1531,7 +1531,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> @@ -1551,7 +1551,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2 ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1} ; CHECK-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> @@ -1564,7 +1564,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> @@ -1585,7 +1585,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p @@ -1599,7 +1599,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p @@ -1613,7 +1613,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p @@ -1627,7 +1627,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p @@ -1641,7 +1641,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p @@ -1655,7 +1655,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p @@ -1678,7 +1678,7 @@ define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1} ; CHECK-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p @@ -1692,7 +1692,7 @@ define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i ; CHECK: # BB#0: ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %vec2 = load <4 x i64>, <4 x i64>* %vec2p @@ -1713,7 +1713,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2 ; CHECK-LABEL: test_8xi64_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-15, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1726,7 +1726,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-15, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13> @@ -1737,7 +1737,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2 ; CHECK-LABEL: test_8xi64_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-17, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1750,7 +1750,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-17, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 12, i32 13> @@ -1761,7 +1761,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2 ; CHECK-LABEL: test_8xi64_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-24, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1774,7 +1774,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-24, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9> @@ -1793,7 +1793,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2 ; CHECK-LABEL: test_8xi64_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1806,7 +1806,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] ; CHECK-NEXT: retq %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11> @@ -1826,7 +1826,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* ; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1840,7 +1840,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] ; CHECK-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p @@ -1853,7 +1853,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* ; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1867,7 +1867,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] ; CHECK-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p @@ -1880,7 +1880,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* ; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $42, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1894,7 +1894,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $42, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] ; CHECK-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p @@ -1916,7 +1916,7 @@ define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* ; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1930,7 +1930,7 @@ define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] ; CHECK-NEXT: retq %vec2 = load <8 x i64>, <8 x i64>* %vec2p diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle.ll b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle.ll index f0194414998..0e6d5c7d5b3 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) { ; CHECK-LABEL: test_16xi8_perm_mask0: diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll b/llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll index 14379295857..f11cd20896b 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skx %s -o - | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s define <4 x float> @test_4xfloat_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2) { ; CHECK-LABEL: test_4xfloat_unpack_low_mask0: @@ -13,7 +13,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -26,7 +26,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> @@ -37,7 +37,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -50,7 +50,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> @@ -61,7 +61,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -74,7 +74,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> @@ -93,7 +93,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -106,7 +106,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> @@ -126,7 +126,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -140,7 +140,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %v ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $8, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p @@ -153,7 +153,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -167,7 +167,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %v ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p @@ -180,7 +180,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -194,7 +194,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %v ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p @@ -216,7 +216,7 @@ define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -230,7 +230,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %v ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p @@ -251,7 +251,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $122, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -264,7 +264,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $122, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> @@ -275,7 +275,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-107, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -288,7 +288,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-107, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> @@ -299,7 +299,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-25, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -312,7 +312,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-25, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> @@ -331,7 +331,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-127, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -344,7 +344,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-127, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> @@ -364,7 +364,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $72, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -378,7 +378,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %v ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $72, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -391,7 +391,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-64, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -405,7 +405,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %v ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-64, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -418,7 +418,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -432,7 +432,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %v ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-98, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -454,7 +454,7 @@ define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $64, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -468,7 +468,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %v ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $64, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -489,7 +489,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, < ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-5916, %ax # imm = 0xE8E4 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -502,7 +502,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %ve ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-5916, %ax # imm = 0xE8E4 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> @@ -513,7 +513,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, < ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-1130, %ax # imm = 0xFB96 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -526,7 +526,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %ve ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-1130, %ax # imm = 0xFB96 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> @@ -537,7 +537,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, < ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12439, %ax # imm = 0xCF69 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -550,7 +550,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %ve ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12439, %ax # imm = 0xCF69 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> @@ -569,7 +569,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, < ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-6413, %ax # imm = 0xE6F3 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -582,7 +582,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %ve ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-6413, %ax # imm = 0xE6F3 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29> @@ -602,7 +602,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $20326, %ax # imm = 0x4F66 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -616,7 +616,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $20326, %ax # imm = 0x4F66 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -629,7 +629,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-17707, %ax # imm = 0xBAD5 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -643,7 +643,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-17707, %ax # imm = 0xBAD5 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -656,7 +656,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-6631, %ax # imm = 0xE619 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -670,7 +670,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-6631, %ax # imm = 0xE619 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -692,7 +692,7 @@ define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-20711, %ax # imm = 0xAF19 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -706,7 +706,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-20711, %ax # imm = 0xAF19 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -727,7 +727,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, < ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -740,7 +740,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %ve ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] ; CHECK-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> @@ -751,7 +751,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, < ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -764,7 +764,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %ve ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] ; CHECK-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> @@ -784,7 +784,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -798,7 +798,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] ; CHECK-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p @@ -811,7 +811,7 @@ define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -825,7 +825,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] ; CHECK-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p @@ -846,7 +846,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, < ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -859,7 +859,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %ve ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -870,7 +870,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, < ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -883,7 +883,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %ve ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -894,7 +894,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, < ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -907,7 +907,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %ve ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -926,7 +926,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, < ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -939,7 +939,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %ve ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> @@ -959,7 +959,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -973,7 +973,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -986,7 +986,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1000,7 +1000,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -1013,7 +1013,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1027,7 +1027,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -1049,7 +1049,7 @@ define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1063,7 +1063,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -1084,7 +1084,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, < ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-73, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1097,7 +1097,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %ve ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-73, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> @@ -1108,7 +1108,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, < ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $102, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1121,7 +1121,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %ve ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $102, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> @@ -1132,7 +1132,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, < ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-46, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1145,7 +1145,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %ve ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-46, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> @@ -1164,7 +1164,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, < ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-86, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1177,7 +1177,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %ve ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-86, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> @@ -1197,7 +1197,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1211,7 +1211,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -1224,7 +1224,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1238,7 +1238,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -1251,7 +1251,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-35, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1265,7 +1265,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-35, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -1287,7 +1287,7 @@ define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1301,7 +1301,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -1322,7 +1322,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -1335,7 +1335,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> @@ -1346,7 +1346,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -1359,7 +1359,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> @@ -1370,7 +1370,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -1383,7 +1383,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> @@ -1402,7 +1402,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: vmovaps %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -1415,7 +1415,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $7, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> @@ -1435,7 +1435,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -1449,7 +1449,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> % ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p @@ -1462,7 +1462,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -1476,7 +1476,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> % ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p @@ -1489,7 +1489,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -1503,7 +1503,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> % ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p @@ -1525,7 +1525,7 @@ define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -1539,7 +1539,7 @@ define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> % ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-NEXT: retq %vec2 = load <4 x float>, <4 x float>* %vec2p @@ -1560,7 +1560,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $21, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -1573,7 +1573,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $21, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> @@ -1584,7 +1584,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $82, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -1597,7 +1597,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $82, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> @@ -1608,7 +1608,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -1621,7 +1621,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-126, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> @@ -1640,7 +1640,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-19, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: vmovaps %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -1653,7 +1653,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-19, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> @@ -1673,7 +1673,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $28, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1687,7 +1687,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> % ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $28, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -1700,7 +1700,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-115, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1714,7 +1714,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> % ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-115, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -1727,7 +1727,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-76, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1741,7 +1741,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> % ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-76, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -1763,7 +1763,7 @@ define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-116, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: vmovaps %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -1777,7 +1777,7 @@ define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> % ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-116, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] ; CHECK-NEXT: retq %vec2 = load <8 x float>, <8 x float>* %vec2p @@ -1798,7 +1798,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12160, %ax # imm = 0xD080 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1811,7 +1811,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %v ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-12160, %ax # imm = 0xD080 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> @@ -1822,7 +1822,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30129, %ax # imm = 0x8A4F -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1835,7 +1835,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %v ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-30129, %ax # imm = 0x8A4F -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> @@ -1846,7 +1846,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-2371, %ax # imm = 0xF6BD -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1859,7 +1859,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %v ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-2371, %ax # imm = 0xF6BD -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> @@ -1878,7 +1878,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-26006, %ax # imm = 0x9A6A -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: vmovaps %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -1891,7 +1891,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %v ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-26006, %ax # imm = 0x9A6A -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31> @@ -1911,7 +1911,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %ve ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-27027, %ax # imm = 0x966D -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1925,7 +1925,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-27027, %ax # imm = 0x966D -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -1938,7 +1938,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %ve ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $29162, %ax # imm = 0x71EA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1952,7 +1952,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movw $29162, %ax # imm = 0x71EA -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -1965,7 +1965,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %ve ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-26458, %ax # imm = 0x98A6 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -1979,7 +1979,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movw $-26458, %ax # imm = 0x98A6 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -2001,7 +2001,7 @@ define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %ve ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $25225, %ax # imm = 0x6289 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: vmovaps %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -2015,7 +2015,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movw $25225, %ax # imm = 0x6289 -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] ; CHECK-NEXT: retq %vec2 = load <16 x float>, <16 x float>* %vec2p @@ -2036,7 +2036,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -2049,7 +2049,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %v ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] ; CHECK-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> @@ -2060,7 +2060,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] ; CHECK-NEXT: vmovapd %xmm2, %xmm0 ; CHECK-NEXT: retq @@ -2073,7 +2073,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %v ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] ; CHECK-NEXT: retq %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3> @@ -2093,7 +2093,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %ve ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -2107,7 +2107,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] ; CHECK-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p @@ -2120,7 +2120,7 @@ define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %ve ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] ; CHECK-NEXT: vmovapd %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -2134,7 +2134,7 @@ define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $2, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] ; CHECK-NEXT: retq %vec2 = load <2 x double>, <2 x double>* %vec2p @@ -2155,7 +2155,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $9, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -2168,7 +2168,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %v ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $9, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> @@ -2179,7 +2179,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -2192,7 +2192,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %v ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $14, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> @@ -2203,7 +2203,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -2216,7 +2216,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %v ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $6, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> @@ -2235,7 +2235,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: vmovapd %ymm2, %ymm0 ; CHECK-NEXT: retq @@ -2248,7 +2248,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %v ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] ; CHECK-NEXT: retq %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7> @@ -2268,7 +2268,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %ve ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -2282,7 +2282,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $11, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -2295,7 +2295,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %ve ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -2309,7 +2309,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $12, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -2322,7 +2322,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %ve ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -2336,7 +2336,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -2358,7 +2358,7 @@ define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %ve ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq @@ -2372,7 +2372,7 @@ define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] ; CHECK-NEXT: retq %vec2 = load <4 x double>, <4 x double>* %vec2p @@ -2393,7 +2393,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-27, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -2406,7 +2406,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %v ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-27, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> @@ -2417,7 +2417,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-21, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -2430,7 +2430,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %v ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-21, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> @@ -2441,7 +2441,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-118, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -2454,7 +2454,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %v ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-118, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> @@ -2473,7 +2473,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $100, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: vmovapd %zmm2, %zmm0 ; CHECK-NEXT: retq @@ -2486,7 +2486,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %v ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $100, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; CHECK-NEXT: retq %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> @@ -2506,7 +2506,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %ve ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-76, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -2520,7 +2520,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-76, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -2533,7 +2533,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %ve ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $71, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -2547,7 +2547,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1: ; CHECK: # BB#0: ; CHECK-NEXT: movb $71, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -2560,7 +2560,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %ve ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-49, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -2574,7 +2574,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-49, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p @@ -2596,7 +2596,7 @@ define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %ve ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-40, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -2610,7 +2610,7 @@ define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3: ; CHECK: # BB#0: ; CHECK-NEXT: movb $-40, %al -; CHECK-NEXT: kmovd %eax, %k1 +; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] ; CHECK-NEXT: retq %vec2 = load <8 x double>, <8 x double>* %vec2p |

