summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86InstrAVX512.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrAVX512.td')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td123
1 files changed, 123 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 0d8ecb91377..7eba559f49a 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2794,6 +2794,129 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
(v16i32 VR512:$src))),
(VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
+let Predicates = [HasVLX] in {
+ // Special patterns for storing subvector extracts of lower 128-bits of 256.
+ // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
+ def : Pat<(alignedstore (v2f64 (extract_subvector
+ (v4f64 VR256X:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v4f32 (extract_subvector
+ (v8f32 VR256X:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v2i64 (extract_subvector
+ (v4i64 VR256X:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQA64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v4i32 (extract_subvector
+ (v8i32 VR256X:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQA32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v8i16 (extract_subvector
+ (v16i16 VR256X:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQA32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v16i8 (extract_subvector
+ (v32i8 VR256X:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQA32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
+
+ def : Pat<(store (v2f64 (extract_subvector
+ (v4f64 VR256X:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
+ def : Pat<(store (v4f32 (extract_subvector
+ (v8f32 VR256X:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
+ def : Pat<(store (v2i64 (extract_subvector
+ (v4i64 VR256X:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQU64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
+ def : Pat<(store (v4i32 (extract_subvector
+ (v8i32 VR256X:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQU32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
+ def : Pat<(store (v8i16 (extract_subvector
+ (v16i16 VR256X:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQU32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
+ def : Pat<(store (v16i8 (extract_subvector
+ (v32i8 VR256X:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQU32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
+
+ // Special patterns for storing subvector extracts of lower 128-bits of 512.
+ // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
+ def : Pat<(alignedstore (v2f64 (extract_subvector
+ (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v4f32 (extract_subvector
+ (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v2i64 (extract_subvector
+ (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQA64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v4i32 (extract_subvector
+ (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQA32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v8i16 (extract_subvector
+ (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQA32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
+ def : Pat<(alignedstore (v16i8 (extract_subvector
+ (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQA32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
+
+ def : Pat<(store (v2f64 (extract_subvector
+ (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
+ def : Pat<(store (v4f32 (extract_subvector
+ (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
+ def : Pat<(store (v2i64 (extract_subvector
+ (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQU64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
+ def : Pat<(store (v4i32 (extract_subvector
+ (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQU32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
+ def : Pat<(store (v8i16 (extract_subvector
+ (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQU32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
+ def : Pat<(store (v16i8 (extract_subvector
+ (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQU32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
+
+ // Special patterns for storing subvector extracts of lower 256-bits of 512.
+ // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
+ def : Pat<(alignedstore (v4f64 (extract_subvector
+ (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
+ def : Pat<(alignedstore (v8f32 (extract_subvector
+ (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVAPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
+ def : Pat<(alignedstore (v4i64 (extract_subvector
+ (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQA64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
+ def : Pat<(alignedstore (v8i32 (extract_subvector
+ (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQA32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
+ def : Pat<(alignedstore (v16i16 (extract_subvector
+ (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQA32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
+ def : Pat<(alignedstore (v32i8 (extract_subvector
+ (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQA32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
+
+ def : Pat<(store (v4f64 (extract_subvector
+ (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
+ def : Pat<(store (v8f32 (extract_subvector
+ (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVUPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
+ def : Pat<(store (v4i64 (extract_subvector
+ (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQU64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
+ def : Pat<(store (v8i32 (extract_subvector
+ (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQU32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
+ def : Pat<(store (v16i16 (extract_subvector
+ (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQU32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
+ def : Pat<(store (v32i8 (extract_subvector
+ (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
+ (VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
+}
+
+
// Move Int Doubleword to Packed Double Int
//
def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
OpenPOWER on IntegriCloud