diff options
author | Craig Topper <craig.topper@intel.com> | 2018-05-17 05:41:11 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-05-17 05:41:11 +0000 |
commit | a2c526471850ad789930ab6aa631c307fab2e11a (patch) | |
tree | 360f0f6db3408f3e1748fdd1166835c57a269e76 | |
parent | 121078bee7c25566b524e8c182f00dbb23b19fbb (diff) | |
download | bcm5719-llvm-a2c526471850ad789930ab6aa631c307fab2e11a.tar.gz bcm5719-llvm-a2c526471850ad789930ab6aa631c307fab2e11a.zip |
[X86] Add OptForSize to a couple load folding patterns. Remove some bad FIXME comments.
The FIXME comments were about preventing load folding to avoid a partial xmm update. But these instructions use GPR as input when the load isn't folded. This won't help prevent a partial xmm update.
llvm-svn: 332573
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 15 |
2 files changed, 7 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 1a3e3f022ab..5bb2521a16c 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7194,7 +7194,7 @@ def : Pat<(f64 (fpextend FR32X:$src)), Requires<[HasAVX512]>; def : Pat<(f64 (fpextend (loadf32 addr:$src))), (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX512]>; + Requires<[HasAVX512, OptForSize]>; def : Pat<(f64 (extloadf32 addr:$src)), (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 438fb84b094..22bf465a2b2 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -897,8 +897,6 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { // SSE 1 & 2 - Conversion Instructions //===----------------------------------------------------------------------===// -// FIXME: We probably want to match the rm form only when optimizing for -// size, to avoid false depenendecies (see sse_fp_unop_s for details) multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm, X86FoldableSchedWrite sched> { @@ -925,8 +923,6 @@ let hasSideEffects = 0 in { } } -// FIXME: We probably want to match the rm form only when optimizing for -// size, to avoid false depenendecies (see sse_fp_unop_s for details) multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm, X86FoldableSchedWrite sched> { @@ -1301,24 +1297,25 @@ def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, // Convert scalar single to scalar double // SSE2 instructions with XS prefix -let hasSideEffects = 0, Predicates = [UseAVX] in { +let hasSideEffects = 0 in { def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable, - Sched<[WriteCvtSS2SD]>; + Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable, - Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>; + Sched<[WriteCvtSS2SD.Folded, ReadAfterLd]>, + Requires<[UseAVX, OptForSize]>; } def : Pat<(f64 (fpextend FR32:$src)), (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; def : Pat<(fpextend (loadf32 addr:$src)), - (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>; + (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; def : Pat<(extloadf32 addr:$src), (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, @@ -1343,7 +1340,7 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), // Since these loads aren't folded into the fpextend, we have to match it // explicitly here. def : Pat<(fpextend (loadf32 addr:$src)), - (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>; + (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2, OptForSize]>; def : Pat<(extloadf32 addr:$src), (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; |