diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-09-26 22:23:09 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-09-26 22:23:09 +0000 |
| commit | c89872497457676bbbb564155cc669d587614d00 (patch) | |
| tree | 1bbcde758bae019e3664a74514b7d33ae27af320 /llvm/lib/Target | |
| parent | f98d2c099a45f323a5cc45eb4f38865044090a8b (diff) | |
| download | bcm5719-llvm-c89872497457676bbbb564155cc669d587614d00.tar.gz bcm5719-llvm-c89872497457676bbbb564155cc669d587614d00.zip | |
[X86] Add CodeGenOnly instructions for (f32 (X86selects $mask, (loadf32 addr), fp32imm0) to use masked MOVSS from memory.
Similar for f64 and having a non-zero passthru value.
We were previously not trying to fold the load at all. Using
a CodeGenOnly instruction allows us to use FR32X/FR64X as the
register class to avoid a bunch of COPY_TO_REGCLASS.
llvm-svn: 373021
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 24 |
1 files changed, 23 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index e56e42001e9..f9836067214 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3958,6 +3958,18 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag, !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", "$dst {${mask}} {z}, $src}"), [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; + let isCodeGenOnly = 1 in { + def rmk_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), + (ins _.FRC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst {${mask}}|", + "$dst {${mask}}, $src}"), + [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>; + def rmkz_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), + (ins _.KRCWM:$mask, _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst {${mask}} {z}|", + "$dst {${mask}} {z}, $src}"), + [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>; + } } def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), @@ -4222,16 +4234,26 @@ def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; +def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), + (VMOVSSZrmk_alt FR32X:$src0, VK1WM:$mask, addr:$src)>; +def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), + (VMOVSSZrmkz_alt VK1WM:$mask, addr:$src)>; + def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; -def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)), +def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; +def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), + (VMOVSDZrmk_alt FR64X:$src0, VK1WM:$mask, addr:$src)>; +def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), + (VMOVSDZrmkz_alt VK1WM:$mask, addr:$src)>; + let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in { def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2), |

