diff options
author | Craig Topper <craig.topper@intel.com> | 2017-08-25 23:34:59 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-08-25 23:34:59 +0000 |
commit | d27386a9edfa0d1c33bf6bb4ba704ba1e4bee7c7 (patch) | |
tree | 53cd1684a80521cbea8b8513d3c03acbf5bdd370 /llvm/lib | |
parent | b89dbf02208af035c868f42a50c8115abf0d1971 (diff) | |
download | bcm5719-llvm-d27386a9edfa0d1c33bf6bb4ba704ba1e4bee7c7.tar.gz bcm5719-llvm-d27386a9edfa0d1c33bf6bb4ba704ba1e4bee7c7.zip |
[AVX512] Add patterns to use masked moves to implement masked extract_subvector of the lowest subvector.
This only supports 32 and 64 bit element sizes for now. But we could probably do 16 and 8-bit elements with BWI.
llvm-svn: 311821
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 8f925e4ca55..f73716e556f 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3739,6 +3739,139 @@ let Predicates = [HasVLX] in { (VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>; } +let Predicates = [HasVLX] in { +// A masked extract from the first 128-bits of a 256-bit vector can be +// implemented with masked move. +def : Pat<(v2i64 (vselect VK2WM:$mask, + (extract_subvector (v4i64 VR256X:$src), (iPTR 0)), + VR128X:$src0)), + (v2i64 (VMOVDQA64Z128rrk VR128X:$src0, VK2WM:$mask, + (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))>; +def : Pat<(v4i32 (vselect VK4WM:$mask, + (extract_subvector (v8i32 VR256X:$src), (iPTR 0)), + VR128X:$src0)), + (v4i32 (VMOVDQA32Z128rrk VR128X:$src0, VK4WM:$mask, + (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))>; +def : Pat<(v2f64 (vselect VK2WM:$mask, + (extract_subvector (v4f64 VR256X:$src), (iPTR 0)), + VR128X:$src0)), + (v2f64 (VMOVAPDZ128rrk VR128X:$src0, VK2WM:$mask, + (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; +def : Pat<(v4f32 (vselect VK4WM:$mask, + (extract_subvector (v8f32 VR256X:$src), (iPTR 0)), + VR128X:$src0)), + (v4f32 (VMOVAPSZ128rrk VR128X:$src0, VK4WM:$mask, + (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; + +def : Pat<(v2i64 (vselect VK2WM:$mask, + (extract_subvector (v4i64 VR256X:$src), (iPTR 0)), + (bitconvert (v4i32 immAllZerosV)))), + (v2i64 (VMOVDQA64Z128rrkz VK2WM:$mask, + (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))>; +def : Pat<(v4i32 (vselect VK4WM:$mask, + (extract_subvector (v8i32 VR256X:$src), (iPTR 0)), + (bitconvert (v4i32 immAllZerosV)))), + (v4i32 (VMOVDQA32Z128rrkz VK4WM:$mask, + (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))>; +def : Pat<(v2f64 (vselect VK2WM:$mask, + (extract_subvector (v4f64 VR256X:$src), (iPTR 0)), + (bitconvert (v4i32 immAllZerosV)))), + (v2f64 (VMOVAPDZ128rrkz VK2WM:$mask, + (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>; +def : Pat<(v4f32 (vselect VK4WM:$mask, + (extract_subvector (v8f32 VR256X:$src), (iPTR 0)), + (bitconvert (v4i32 immAllZerosV)))), + (v4f32 (VMOVAPSZ128rrkz VK4WM:$mask, + (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>; + +// A masked extract from the first 128-bits of a 512-bit vector can be +// implemented with masked move. +def : Pat<(v2i64 (vselect VK2WM:$mask, + (extract_subvector (v8i64 VR512:$src), (iPTR 0)), + VR128X:$src0)), + (v2i64 (VMOVDQA64Z128rrk VR128X:$src0, VK2WM:$mask, + (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))>; +def : Pat<(v4i32 (vselect VK4WM:$mask, + (extract_subvector (v16i32 VR512:$src), (iPTR 0)), + VR128X:$src0)), + (v4i32 (VMOVDQA32Z128rrk VR128X:$src0, VK4WM:$mask, + (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))>; +def : Pat<(v2f64 (vselect VK2WM:$mask, + (extract_subvector (v8f64 VR512:$src), (iPTR 0)), + VR128X:$src0)), + (v2f64 (VMOVAPDZ128rrk VR128X:$src0, VK2WM:$mask, + (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>; +def : Pat<(v4f32 (vselect VK4WM:$mask, + (extract_subvector (v16f32 VR512:$src), (iPTR 0)), + VR128X:$src0)), + (v4f32 (VMOVAPSZ128rrk VR128X:$src0, VK4WM:$mask, + (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>; + +def : Pat<(v2i64 (vselect VK2WM:$mask, + (extract_subvector (v8i64 VR512:$src), (iPTR 0)), + (bitconvert (v4i32 immAllZerosV)))), + (v2i64 (VMOVDQA64Z128rrkz VK2WM:$mask, + (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))>; +def : Pat<(v4i32 (vselect VK4WM:$mask, + (extract_subvector (v16i32 VR512:$src), (iPTR 0)), + (bitconvert (v4i32 immAllZerosV)))), + (v4i32 (VMOVDQA32Z128rrkz VK4WM:$mask, + (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))>; +def : Pat<(v2f64 (vselect VK2WM:$mask, + (extract_subvector (v8f64 VR512:$src), (iPTR 0)), + (bitconvert (v4i32 immAllZerosV)))), + (v2f64 (VMOVAPDZ128rrkz VK2WM:$mask, + (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>; +def : Pat<(v4f32 (vselect VK4WM:$mask, + (extract_subvector (v16f32 VR512:$src), (iPTR 0)), + (bitconvert (v4i32 immAllZerosV)))), + (v4f32 (VMOVAPSZ128rrkz VK4WM:$mask, + (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>; + +// A masked extract from the first 256-bits of a 512-bit vector can be +// implemented with masked move. +def : Pat<(v4i64 (vselect VK4WM:$mask, + (extract_subvector (v8i64 VR512:$src), (iPTR 0)), + VR256X:$src0)), + (v4i64 (VMOVDQA64Z256rrk VR256X:$src0, VK4WM:$mask, + (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)))>; +def : Pat<(v8i32 (vselect VK8WM:$mask, + (extract_subvector (v16i32 VR512:$src), (iPTR 0)), + VR256X:$src0)), + (v8i32 (VMOVDQA32Z256rrk VR256X:$src0, VK8WM:$mask, + (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)))>; +def : Pat<(v4f64 (vselect VK4WM:$mask, + (extract_subvector (v8f64 VR512:$src), (iPTR 0)), + VR256X:$src0)), + (v4f64 (VMOVAPDZ256rrk VR256X:$src0, VK4WM:$mask, + (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)))>; +def : Pat<(v8f32 (vselect VK8WM:$mask, + (extract_subvector (v16f32 VR512:$src), (iPTR 0)), + VR256X:$src0)), + (v8f32 (VMOVAPSZ256rrk VR256X:$src0, VK8WM:$mask, + (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)))>; + +def : Pat<(v4i64 (vselect VK4WM:$mask, + (extract_subvector (v8i64 VR512:$src), (iPTR 0)), + (bitconvert (v8i32 immAllZerosV)))), + (v4i64 (VMOVDQA64Z256rrkz VK4WM:$mask, + (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)))>; +def : Pat<(v8i32 (vselect VK8WM:$mask, + (extract_subvector (v16i32 VR512:$src), (iPTR 0)), + (bitconvert (v8i32 immAllZerosV)))), + (v8i32 (VMOVDQA32Z256rrkz VK8WM:$mask, + (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)))>; +def : Pat<(v4f64 (vselect VK4WM:$mask, + (extract_subvector (v8f64 VR512:$src), (iPTR 0)), + (bitconvert (v8i32 immAllZerosV)))), + (v4f64 (VMOVAPDZ256rrkz VK4WM:$mask, + (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)))>; +def : Pat<(v8f32 (vselect VK8WM:$mask, + (extract_subvector (v16f32 VR512:$src), (iPTR 0)), + (bitconvert (v8i32 immAllZerosV)))), + (v8f32 (VMOVAPSZ256rrkz VK8WM:$mask, + (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)))>; +} // Move Int Doubleword to Packed Double Int // |