diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrAVX512.td')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 0d8ecb91377..7eba559f49a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2794,6 +2794,129 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), (v16i32 VR512:$src))), (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>; +let Predicates = [HasVLX] in { + // Special patterns for storing subvector extracts of lower 128-bits of 256. + // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr + def : Pat<(alignedstore (v2f64 (extract_subvector + (v4f64 VR256X:$src), (iPTR 0))), addr:$dst), + (VMOVAPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>; + def : Pat<(alignedstore (v4f32 (extract_subvector + (v8f32 VR256X:$src), (iPTR 0))), addr:$dst), + (VMOVAPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>; + def : Pat<(alignedstore (v2i64 (extract_subvector + (v4i64 VR256X:$src), (iPTR 0))), addr:$dst), + (VMOVDQA64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>; + def : Pat<(alignedstore (v4i32 (extract_subvector + (v8i32 VR256X:$src), (iPTR 0))), addr:$dst), + (VMOVDQA32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>; + def : Pat<(alignedstore (v8i16 (extract_subvector + (v16i16 VR256X:$src), (iPTR 0))), addr:$dst), + (VMOVDQA32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>; + def : Pat<(alignedstore (v16i8 (extract_subvector + (v32i8 VR256X:$src), (iPTR 0))), addr:$dst), + (VMOVDQA32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>; + + def : Pat<(store (v2f64 (extract_subvector + (v4f64 VR256X:$src), (iPTR 0))), addr:$dst), + (VMOVUPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>; + def : Pat<(store (v4f32 (extract_subvector + (v8f32 VR256X:$src), (iPTR 0))), addr:$dst), + (VMOVUPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>; + def : Pat<(store (v2i64 (extract_subvector + (v4i64 VR256X:$src), (iPTR 0))), addr:$dst), + (VMOVDQU64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>; + def : Pat<(store (v4i32 (extract_subvector + (v8i32 VR256X:$src), (iPTR 0))), addr:$dst), + (VMOVDQU32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>; + def : Pat<(store (v8i16 (extract_subvector + (v16i16 VR256X:$src), (iPTR 0))), addr:$dst), + (VMOVDQU32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>; + def : Pat<(store (v16i8 (extract_subvector + (v32i8 VR256X:$src), (iPTR 0))), addr:$dst), + (VMOVDQU32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>; + + // Special patterns for storing subvector extracts of lower 128-bits of 512. + // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr + def : Pat<(alignedstore (v2f64 (extract_subvector + (v8f64 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVAPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>; + def : Pat<(alignedstore (v4f32 (extract_subvector + (v16f32 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVAPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>; + def : Pat<(alignedstore (v2i64 (extract_subvector + (v8i64 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQA64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>; + def : Pat<(alignedstore (v4i32 (extract_subvector + (v16i32 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQA32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>; + def : Pat<(alignedstore (v8i16 (extract_subvector + (v32i16 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQA32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>; + def : Pat<(alignedstore (v16i8 (extract_subvector + (v64i8 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQA32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>; + + def : Pat<(store (v2f64 (extract_subvector + (v8f64 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVUPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>; + def : Pat<(store (v4f32 (extract_subvector + (v16f32 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVUPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>; + def : Pat<(store (v2i64 (extract_subvector + (v8i64 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQU64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>; + def : Pat<(store (v4i32 (extract_subvector + (v16i32 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQU32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>; + def : Pat<(store (v8i16 (extract_subvector + (v32i16 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQU32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>; + def : Pat<(store (v16i8 (extract_subvector + (v64i8 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQU32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>; + + // Special patterns for storing subvector extracts of lower 256-bits of 512. + // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr + def : Pat<(alignedstore (v4f64 (extract_subvector + (v8f64 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVAPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; + def : Pat<(alignedstore (v8f32 (extract_subvector + (v16f32 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVAPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; + def : Pat<(alignedstore (v4i64 (extract_subvector + (v8i64 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQA64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; + def : Pat<(alignedstore (v8i32 (extract_subvector + (v16i32 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQA32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; + def : Pat<(alignedstore (v16i16 (extract_subvector + (v32i16 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQA32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; + def : Pat<(alignedstore (v32i8 (extract_subvector + (v64i8 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQA32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; + + def : Pat<(store (v4f64 (extract_subvector + (v8f64 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVUPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; + def : Pat<(store (v8f32 (extract_subvector + (v16f32 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVUPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; + def : Pat<(store (v4i64 (extract_subvector + (v8i64 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQU64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; + def : Pat<(store (v8i32 (extract_subvector + (v16i32 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQU32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; + def : Pat<(store (v16i16 (extract_subvector + (v32i16 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQU32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; + def : Pat<(store (v32i8 (extract_subvector + (v64i8 VR512:$src), (iPTR 0))), addr:$dst), + (VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; +} + + // Move Int Doubleword to Packed Double Int // def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), |