diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-07-02 17:51:02 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-07-02 17:51:02 +0000 |
| commit | cffbaa93b72b307904935c380f90d49d00c7ecdc (patch) | |
| tree | a31e28a995d70ea4c8c32a9287f21210880fe0eb /llvm/lib | |
| parent | 36face4c1df75c1e4e82c3f26b0b98495af9359e (diff) | |
| download | bcm5719-llvm-cffbaa93b72b307904935c380f90d49d00c7ecdc.tar.gz bcm5719-llvm-cffbaa93b72b307904935c380f90d49d00c7ecdc.zip | |
[X86] Add patterns to select (scalar_to_vector (loadf32)) as (V)MOVSSrm instead of COPY_TO_REGCLASS + (V)MOVSSrm_alt.
Similar for (V)MOVSD. Ultimately, I'd like to see about folding
scalar_to_vector+load to vzload. Which would select as (V)MOVSSrm
so this is closer to that.
llvm-svn: 364948
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 29 |
2 files changed, 24 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2cdcb1e1f0e..b2dfc512932 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4312,6 +4312,10 @@ let Predicates = [HasAVX512, OptForSpeed] in { } let Predicates = [HasAVX512] in { + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (VMOVSSZrm addr:$src)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (VMOVSDZrm addr:$src)>; // Represent the same patterns above but in the form they appear for // 256-bit types diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index c79cf7ade88..56974c44b4d 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -263,6 +263,11 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { // Patterns let Predicates = [UseAVX] in { + def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (VMOVSSrm addr:$src)>; + def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (VMOVSDrm addr:$src)>; + // Represent the same patterns above but in the form they appear for // 256-bit types def : Pat<(v8f32 (X86vzload addr:$src)), @@ -290,17 +295,23 @@ let Predicates = [UseAVX, OptForSize] in { (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>; } -let Predicates = [UseSSE1] in { - let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in { - // Move scalar to XMM zero-extended, zeroing a VR128 then do a - // MOVSS to the lower bits. - def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; - def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; - } +let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in { +// Move scalar to XMM zero-extended, zeroing a VR128 then do a +// MOVSS to the lower bits. +def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), + (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; +def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), + (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; } +let Predicates = [UseSSE2] in +def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (MOVSDrm addr:$src)>; + +let Predicates = [UseSSE1] in +def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), + (MOVSSrm addr:$src)>; + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Move Aligned/Unaligned FP Instructions //===----------------------------------------------------------------------===// |

