summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-07-02 17:51:02 +0000
committerCraig Topper <craig.topper@intel.com>2019-07-02 17:51:02 +0000
commitcffbaa93b72b307904935c380f90d49d00c7ecdc (patch)
treea31e28a995d70ea4c8c32a9287f21210880fe0eb /llvm/lib
parent36face4c1df75c1e4e82c3f26b0b98495af9359e (diff)
downloadbcm5719-llvm-cffbaa93b72b307904935c380f90d49d00c7ecdc.tar.gz
bcm5719-llvm-cffbaa93b72b307904935c380f90d49d00c7ecdc.zip
[X86] Add patterns to select (scalar_to_vector (loadf32)) as (V)MOVSSrm instead of COPY_TO_REGCLASS + (V)MOVSSrm_alt.
Similar for (V)MOVSD. Ultimately, I'd like to see about folding scalar_to_vector+load to vzload. Which would select as (V)MOVSSrm so this is closer to that. llvm-svn: 364948
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td4
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td29
2 files changed, 24 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 2cdcb1e1f0e..b2dfc512932 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4312,6 +4312,10 @@ let Predicates = [HasAVX512, OptForSpeed] in {
}
let Predicates = [HasAVX512] in {
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (VMOVSSZrm addr:$src)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (VMOVSDZrm addr:$src)>;
// Represent the same patterns above but in the form they appear for
// 256-bit types
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index c79cf7ade88..56974c44b4d 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -263,6 +263,11 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
// Patterns
let Predicates = [UseAVX] in {
+ def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (VMOVSSrm addr:$src)>;
+ def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (VMOVSDrm addr:$src)>;
+
// Represent the same patterns above but in the form they appear for
// 256-bit types
def : Pat<(v8f32 (X86vzload addr:$src)),
@@ -290,17 +295,23 @@ let Predicates = [UseAVX, OptForSize] in {
(v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)))), sub_xmm)>;
}
-let Predicates = [UseSSE1] in {
- let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
- // Move scalar to XMM zero-extended, zeroing a VR128 then do a
- // MOVSS to the lower bits.
- def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
- }
+let Predicates = [UseSSE1, NoSSE41_Or_OptForSize] in {
+// Move scalar to XMM zero-extended, zeroing a VR128 then do a
+// MOVSS to the lower bits.
+def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+ (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>;
+def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+ (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
}
+let Predicates = [UseSSE2] in
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (MOVSDrm addr:$src)>;
+
+let Predicates = [UseSSE1] in
+def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (MOVSSrm addr:$src)>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
//===----------------------------------------------------------------------===//
OpenPOWER on IntegriCloud