summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorGuy Blank <guy.blank@intel.com>2017-07-31 08:26:14 +0000
committerGuy Blank <guy.blank@intel.com>2017-07-31 08:26:14 +0000
commitb169d56dc36ada9aa137763fa50056790513ee7a (patch)
tree1496d61315a8738d3b978929ec735abb821a2fce /llvm/lib/Target
parente9a5e7e407816f338b2aba97a5b92f729986556b (diff)
downloadbcm5719-llvm-b169d56dc36ada9aa137763fa50056790513ee7a.tar.gz
bcm5719-llvm-b169d56dc36ada9aa137763fa50056790513ee7a.zip
[X86][AVX512] Add masked MOVS[S|D] patterns
Added patterns to recognize AND 1 on the mask of a scalar masked move is not needed since only the lower bit is relevant for the instruction. Differential Revision: https://reviews.llvm.org/D35897 llvm-svn: 309546
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td16
1 files changed, 16 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 2b5bbc1c094..079116353bc 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4002,10 +4002,26 @@ defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
+def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
+ (f32 FR32X:$src1), (f32 FR32X:$src2))),
+ (COPY_TO_REGCLASS
+ (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
+ GR8:$mask, sub_8bit)), VK1WM),
+ (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
+
def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
(COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
+def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
+ (f64 FR64X:$src1), (f64 FR64X:$src2))),
+ (COPY_TO_REGCLASS
+ (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
+ (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
+ GR8:$mask, sub_8bit)), VK1WM),
+ (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
+
def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
(COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
OpenPOWER on IntegriCloud