diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 2 |
2 files changed, 13 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 0b8cebb2e8b..7b1cc56b43f 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3290,13 +3290,16 @@ multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr, string Name, bit NoMRPattern = 0> { let Predicates = [prd] in defm Z : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info512, alignedstore, - masked_store_aligned512, Name#Z>, EVEX_V512; + masked_store_aligned512, Name#Z, + NoMRPattern>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info256, alignedstore, - masked_store_aligned256, Name#Z256>, EVEX_V256; + masked_store_aligned256, Name#Z256, + NoMRPattern>, EVEX_V256; defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info128, alignedstore, - masked_store_aligned128, Name#Z128>, EVEX_V128; + masked_store_aligned128, Name#Z128, + NoMRPattern>, EVEX_V128; } } @@ -3450,6 +3453,8 @@ let Predicates = [HasBWI, NoVLX] in { let Predicates = [HasAVX512] in { // 512-bit store. + def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst), + (VMOVDQA64Zmr addr:$dst, VR512:$src)>; def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst), (VMOVDQA64Zmr addr:$dst, VR512:$src)>; def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst), @@ -3464,6 +3469,8 @@ let Predicates = [HasAVX512] in { let Predicates = [HasVLX] in { // 128-bit store. + def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst), + (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst), (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>; def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst), @@ -3476,6 +3483,8 @@ let Predicates = [HasVLX] in { (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>; // 256-bit store. + def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst), + (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst), (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>; def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst), diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index bd6618f0e61..11157bd3660 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -341,7 +341,7 @@ define void@test_int_x86_avx512_mask_store_d_512(i8* %ptr1, i8* %ptr2, <16 x i32 ; CHECK: ## %bb.0: ; CHECK-NEXT: kmovw %edx, %k1 ; CHECK-NEXT: vmovdqa32 %zmm0, (%rdi) {%k1} -; CHECK-NEXT: vmovdqa32 %zmm0, (%rsi) +; CHECK-NEXT: vmovdqa64 %zmm0, (%rsi) ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2) call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1) |