diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-load-store.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-select.ll | 2 |
3 files changed, 16 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2b5bbc1c094..079116353bc 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4002,10 +4002,26 @@ defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info, defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info, (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>; +def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))), + (f32 FR32X:$src1), (f32 FR32X:$src2))), + (COPY_TO_REGCLASS + (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), + GR8:$mask, sub_8bit)), VK1WM), + (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; + def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; +def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))), + (f64 FR64X:$src1), (f64 FR64X:$src2))), + (COPY_TO_REGCLASS + (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), + (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), + GR8:$mask, sub_8bit)), VK1WM), + (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; + def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; diff --git a/llvm/test/CodeGen/X86/avx512-load-store.ll b/llvm/test/CodeGen/X86/avx512-load-store.ll index 4fd985bf24c..e755e96792e 100644 --- a/llvm/test/CodeGen/X86/avx512-load-store.ll +++ b/llvm/test/CodeGen/X86/avx512-load-store.ll @@ -12,7 +12,6 @@ define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x ; CHECK32-LABEL: test_mm_mask_move_ss: ; CHECK32: # BB#0: # %entry ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK32-NEXT: andb $1, %al ; CHECK32-NEXT: kmovw %eax, %k1 ; CHECK32-NEXT: vmovss %xmm2, %xmm0, %xmm0 {%k1} ; CHECK32-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] @@ -37,7 +36,6 @@ define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 ; CHECK32-LABEL: test_mm_maskz_move_ss: ; CHECK32: # BB#0: # %entry ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK32-NEXT: andb $1, %al ; CHECK32-NEXT: kmovw %eax, %k1 ; CHECK32-NEXT: vxorps %xmm2, %xmm2, %xmm2 ; CHECK32-NEXT: vmovss %xmm1, %xmm0, %xmm2 {%k1} @@ -62,7 +60,6 @@ define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 ; CHECK32-LABEL: test_mm_mask_move_sd: ; CHECK32: # BB#0: # %entry ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK32-NEXT: andb $1, %al ; CHECK32-NEXT: kmovw %eax, %k1 ; CHECK32-NEXT: vmovsd %xmm2, %xmm0, %xmm0 {%k1} ; CHECK32-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] @@ -87,7 +84,6 @@ define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, < ; CHECK32-LABEL: test_mm_maskz_move_sd: ; CHECK32: # BB#0: # %entry ; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al -; CHECK32-NEXT: andb $1, %al ; CHECK32-NEXT: kmovw %eax, %k1 ; CHECK32-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; CHECK32-NEXT: vmovsd %xmm1, %xmm0, %xmm2 {%k1} diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index 0b60f6e3524..cfab98f8367 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -289,7 +289,6 @@ define double @pr30561_f64(double %b, double %a, i1 %c) { ; ; X64-LABEL: pr30561_f64: ; X64: # BB#0: -; X64-NEXT: andb $1, %dil ; X64-NEXT: kmovw %edi, %k1 ; X64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} ; X64-NEXT: retq @@ -309,7 +308,6 @@ define float @pr30561_f32(float %b, float %a, i1 %c) { ; ; X64-LABEL: pr30561_f32: ; X64: # BB#0: -; X64-NEXT: andb $1, %dil ; X64-NEXT: kmovw %edi, %k1 ; X64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} ; X64-NEXT: retq |

