diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 19 |
3 files changed, 7 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2bed6c85bec..c4db6932fa8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -534,6 +534,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass); + // Disable f32->f64 extload as we can only generate this in one instruction + // under optsize. So its easier to pattern match (fpext (load)) for that + // case instead of needing to emit 2 instructions for extload in the + // non-optsize case. + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + for (auto VT : { MVT::f32, MVT::f64 }) { // Use ANDPD to simulate FABS. setOperationAction(ISD::FABS, VT, Custom); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index fbadd80b242..f428b201adc 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7521,14 +7521,6 @@ def : Pat<(f64 (fpextend (loadf32 addr:$src))), (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>; -def : Pat<(f64 (extloadf32 addr:$src)), - (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, - Requires<[HasAVX512, OptForSize]>; - -def : Pat<(f64 (extloadf32 addr:$src)), - (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>, - Requires<[HasAVX512, OptForSpeed]>; - def : Pat<(f32 (fpround FR64X:$src)), (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, Requires<[HasAVX512]>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index ade5645d5bf..65849373e91 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1251,13 +1251,6 @@ def : Pat<(f64 (fpextend FR32:$src)), def : Pat<(fpextend (loadf32 addr:$src)), (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, - Requires<[UseAVX, OptForSize]>; -def : Pat<(extloadf32 addr:$src), - (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, - Requires<[UseAVX, OptForSpeed]>; - let isCodeGenOnly = 1 in { def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", @@ -1265,21 +1258,11 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (extloadf32 addr:$src))]>, + [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>, XS, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtSS2SD.Folded]>; } // isCodeGenOnly = 1 -// extload f32 -> f64. This matches load+fpextend because we have a hack in -// the isel (PreprocessForFPConvert) that can introduce loads after dag -// combine. -// Since these loads aren't folded into the fpextend, we have to match it -// explicitly here. -def : Pat<(fpextend (loadf32 addr:$src)), - (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2, OptForSize]>; -def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; - let hasSideEffects = 0 in { def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), |

