From 1ce3b82dea8eb35e77974fc9d97f9a08c690c53d Mon Sep 17 00:00:00 2001
From: Clement Courbet <courbet@google.com>
Date: Fri, 21 Apr 2017 09:20:39 +0000
Subject: X86 memcpy: use REPMOVSB instead of REPMOVS{Q,D,W} for inline copies
 when the subtarget has fast strings.

This has two advantages:
  - Speed is improved. For example, on Haswell thoughput improvements increase
    linearly with size from 256 to 512 bytes, after which they plateau:
    (e.g. 1% for 260 bytes, 25% for 400 bytes, 40% for 508 bytes).
  - Code is much smaller (no need to handle boundaries).

llvm-svn: 300957
---
 llvm/lib/Target/X86/X86Subtarget.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'llvm/lib/Target/X86/X86Subtarget.h')

diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index d0d88d32694..2b858c28e04 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -232,6 +232,9 @@ protected:
   /// True if SHLD based rotate is fast.
   bool HasFastSHLDRotate;
 
+  /// True if the processor has fast REP MOVS.
+  bool HasFastString;
+
   /// True if the short functions should be padded to prevent
   /// a stall when returning too early.
   bool PadShortFunctions;
@@ -472,6 +475,7 @@ public:
   bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
   bool hasFastLZCNT() const { return HasFastLZCNT; }
   bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
+  bool hasFastString() const { return HasFastString; }
   bool hasSlowDivide32() const { return HasSlowDivide32; }
   bool hasSlowDivide64() const { return HasSlowDivide64; }
   bool padShortFunctions() const { return PadShortFunctions; }
-- 
cgit v1.2.3