From 1ce3b82dea8eb35e77974fc9d97f9a08c690c53d Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Fri, 21 Apr 2017 09:20:39 +0000 Subject: X86 memcpy: use REPMOVSB instead of REPMOVS{Q,D,W} for inline copies when the subtarget has fast strings. This has two advantages: - Speed is improved. For example, on Haswell thoughput improvements increase linearly with size from 256 to 512 bytes, after which they plateau: (e.g. 1% for 260 bytes, 25% for 400 bytes, 40% for 508 bytes). - Code is much smaller (no need to handle boundaries). llvm-svn: 300957 --- llvm/lib/Target/X86/X86Subtarget.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'llvm/lib/Target/X86/X86Subtarget.h') diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index d0d88d32694..2b858c28e04 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -232,6 +232,9 @@ protected: /// True if SHLD based rotate is fast. bool HasFastSHLDRotate; + /// True if the processor has fast REP MOVS. + bool HasFastString; + /// True if the short functions should be padded to prevent /// a stall when returning too early. bool PadShortFunctions; @@ -472,6 +475,7 @@ public: bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } bool hasFastLZCNT() const { return HasFastLZCNT; } bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } + bool hasFastString() const { return HasFastString; } bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } -- cgit v1.2.3