diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll | 264 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll | 8 |
7 files changed, 116 insertions, 204 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1cc09e5c23b..55912a2c3ec 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4632,6 +4632,14 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const { return Subtarget.hasLZCNT(); } +bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, + EVT BitcastVT) const { + if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1) + return false; + + return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT); +} + bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const SelectionDAG &DAG) const { // Do not merge to float value size (128 bytes) if no implicit diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 7708f577ba7..1fb7c7ed4e9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1023,6 +1023,8 @@ namespace llvm { return NumElem > 2; } + bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override; + /// Intel processors have a unified instruction and data cache const char * getClearCacheBuiltinName() const override { return nullptr; // nothing to do, move along. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 0e56c2d208f..0b31f876b38 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2742,6 +2742,8 @@ let Predicates = [HasAVX512, NoDQI] in { let Predicates = [HasAVX512] in { def : Pat<(v1i1 (load addr:$src)), (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK1)>; + def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))), + (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; } let Predicates = [HasAVX512] in { diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll index 5f4b050b863..ef7cf1bc8c9 100644 --- a/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -37186,9 +37186,7 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i6 ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 @@ -37247,9 +37245,7 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 @@ -37309,9 +37305,7 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, < ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 ; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 @@ -37532,9 +37526,7 @@ define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 @@ -37593,9 +37585,7 @@ define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, < ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 @@ -37655,9 +37645,7 @@ define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, ; ; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 ; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 @@ -37853,10 +37841,8 @@ define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 @@ -37869,8 +37855,8 @@ define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: kmovw %k0, (%rsp) +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper @@ -37903,10 +37889,8 @@ define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, < ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 @@ -37919,8 +37903,8 @@ define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, < ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: kmovw %k0, (%rsp) +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper @@ -37954,10 +37938,8 @@ define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 ; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 @@ -37971,8 +37953,8 @@ define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: kmovw %k0, (%rsp) +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper @@ -38154,10 +38136,8 @@ define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm2, %xmm0, %xmm0 @@ -38173,10 +38153,10 @@ define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp @@ -38210,10 +38190,8 @@ define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, < ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqps (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandps %xmm1, %xmm0, %xmm0 @@ -38229,10 +38207,10 @@ define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, < ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp @@ -38267,10 +38245,8 @@ define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vbroadcastss (%rsi), %xmm2 ; NoVLX-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0 @@ -38287,10 +38263,10 @@ define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp @@ -40087,9 +40063,7 @@ define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i6 ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 @@ -40124,9 +40098,7 @@ define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 @@ -40162,9 +40134,7 @@ define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, < ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 @@ -40319,9 +40289,7 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i6 ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 @@ -40366,9 +40334,7 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 @@ -40414,9 +40380,7 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, < ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 @@ -40581,9 +40545,7 @@ define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 @@ -40628,9 +40590,7 @@ define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, < ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 @@ -40676,9 +40636,7 @@ define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 @@ -40860,10 +40818,8 @@ define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 @@ -40876,8 +40832,8 @@ define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: kmovw %k0, (%rsp) +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper @@ -40910,10 +40866,8 @@ define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, < ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 @@ -40926,8 +40880,8 @@ define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, < ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: kmovw %k0, (%rsp) +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper @@ -40961,10 +40915,8 @@ define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 @@ -40978,8 +40930,8 @@ define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: kmovw %k0, (%rsp) +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper @@ -41161,10 +41113,8 @@ define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm2, %xmm0, %xmm0 @@ -41180,10 +41130,10 @@ define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp @@ -41217,10 +41167,8 @@ define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, < ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vandpd %xmm1, %xmm0, %xmm0 @@ -41236,10 +41184,10 @@ define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, < ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp @@ -41274,10 +41222,8 @@ define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 @@ -41294,10 +41240,10 @@ define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp @@ -41497,9 +41443,7 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i6 ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 @@ -41560,9 +41504,7 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 @@ -41624,9 +41566,7 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, < ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 ; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 @@ -41858,9 +41798,7 @@ define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 @@ -41921,9 +41859,7 @@ define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, < ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 @@ -41985,9 +41921,7 @@ define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, ; ; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry -; NoVLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 ; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 @@ -42191,10 +42125,8 @@ define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 @@ -42208,8 +42140,8 @@ define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: kmovw %k0, (%rsp) +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper @@ -42243,10 +42175,8 @@ define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, < ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 @@ -42260,8 +42190,8 @@ define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, < ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: kmovw %k0, (%rsp) +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper @@ -42296,10 +42226,8 @@ define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $64, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $32, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 ; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 @@ -42314,8 +42242,8 @@ define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: kmovw %k0, (%rsp) +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: vzeroupper @@ -42504,10 +42432,8 @@ define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 @@ -42524,10 +42450,10 @@ define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp @@ -42562,10 +42488,8 @@ define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, < ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vcmpeqpd (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 @@ -42582,10 +42506,10 @@ define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, < ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp @@ -42621,10 +42545,8 @@ define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp -; NoVLX-NEXT: subq $96, %rsp -; NoVLX-NEXT: movb %dil, {{[0-9]+}}(%rsp) -; NoVLX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; NoVLX-NEXT: kmovw %eax, %k1 +; NoVLX-NEXT: subq $64, %rsp +; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm2 ; NoVLX-NEXT: vcmpeqpd %ymm2, %ymm0, %ymm0 @@ -42642,10 +42564,10 @@ define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx -; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax +; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll index 8673a0b026d..ba6d7738d4e 100644 --- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll +++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll @@ -45,9 +45,7 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) { ; AVX512-LABEL: ext_i2_2i64: ; AVX512: # %bb.0: ; AVX512-NEXT: andb $3, %dil -; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512-NEXT: kmovd %eax, %k1 +; AVX512-NEXT: kmovd %edi, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq @@ -87,9 +85,7 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) { ; AVX512-LABEL: ext_i4_4i32: ; AVX512: # %bb.0: ; AVX512-NEXT: andb $15, %dil -; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512-NEXT: kmovd %eax, %k1 +; AVX512-NEXT: kmovd %edi, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq @@ -240,9 +236,7 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) { ; AVX512-LABEL: ext_i4_4i64: ; AVX512: # %bb.0: ; AVX512-NEXT: andb $15, %dil -; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512-NEXT: kmovd %eax, %k1 +; AVX512-NEXT: kmovd %edi, %k1 ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z} ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll index ce38b4cfdc4..fc91fda3a3c 100644 --- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll +++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll @@ -49,9 +49,7 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) { ; AVX512F-LABEL: ext_i2_2i64: ; AVX512F: # %bb.0: ; AVX512F-NEXT: andb $3, %dil -; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512F-NEXT: vzeroupper @@ -60,9 +58,7 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) { ; AVX512VLBW-LABEL: ext_i2_2i64: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: andb $3, %dil -; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512VLBW-NEXT: kmovd %eax, %k1 +; AVX512VLBW-NEXT: kmovd %edi, %k1 ; AVX512VLBW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} ; AVX512VLBW-NEXT: retq %1 = bitcast i2 %a0 to <2 x i1> @@ -104,9 +100,7 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) { ; AVX512F-LABEL: ext_i4_4i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: andb $15, %dil -; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 ; AVX512F-NEXT: vzeroupper @@ -115,9 +109,7 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) { ; AVX512VLBW-LABEL: ext_i4_4i32: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: andb $15, %dil -; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512VLBW-NEXT: kmovd %eax, %k1 +; AVX512VLBW-NEXT: kmovd %edi, %k1 ; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} ; AVX512VLBW-NEXT: retq %1 = bitcast i4 %a0 to <4 x i1> @@ -300,9 +292,7 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) { ; AVX512F-LABEL: ext_i4_4i64: ; AVX512F: # %bb.0: ; AVX512F-NEXT: andb $15, %dil -; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} ; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; AVX512F-NEXT: retq @@ -310,9 +300,7 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) { ; AVX512VLBW-LABEL: ext_i4_4i64: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: andb $15, %dil -; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512VLBW-NEXT: kmovd %eax, %k1 +; AVX512VLBW-NEXT: kmovd %edi, %k1 ; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k1} {z} ; AVX512VLBW-NEXT: retq %1 = bitcast i4 %a0 to <4 x i1> diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll index a105c0bfdd0..8af95dfd5b8 100644 --- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll +++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll @@ -43,9 +43,7 @@ define <2 x i1> @bitcast_i2_2i1(i2 zeroext %a0) { ; ; AVX512-LABEL: bitcast_i2_2i1: ; AVX512: # %bb.0: -; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512-NEXT: kmovd %eax, %k1 +; AVX512-NEXT: kmovd %edi, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq @@ -86,9 +84,7 @@ define <4 x i1> @bitcast_i4_4i1(i4 zeroext %a0) { ; ; AVX512-LABEL: bitcast_i4_4i1: ; AVX512: # %bb.0: -; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512-NEXT: kmovd %eax, %k1 +; AVX512-NEXT: kmovd %edi, %k1 ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; AVX512-NEXT: retq |

