From 1ce3b82dea8eb35e77974fc9d97f9a08c690c53d Mon Sep 17 00:00:00 2001 From: Clement Courbet Date: Fri, 21 Apr 2017 09:20:39 +0000 Subject: X86 memcpy: use REPMOVSB instead of REPMOVS{Q,D,W} for inline copies when the subtarget has fast strings. This has two advantages: - Speed is improved. For example, on Haswell thoughput improvements increase linearly with size from 256 to 512 bytes, after which they plateau: (e.g. 1% for 260 bytes, 25% for 400 bytes, 40% for 508 bytes). - Code is much smaller (no need to handle boundaries). llvm-svn: 300957 --- llvm/test/CodeGen/X86/memcpy-struct-by-value.ll | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 llvm/test/CodeGen/X86/memcpy-struct-by-value.ll (limited to 'llvm/test/CodeGen') diff --git a/llvm/test/CodeGen/X86/memcpy-struct-by-value.ll b/llvm/test/CodeGen/X86/memcpy-struct-by-value.ll new file mode 100644 index 00000000000..4bb022e9332 --- /dev/null +++ b/llvm/test/CodeGen/X86/memcpy-struct-by-value.ll @@ -0,0 +1,15 @@ +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=-fast-string < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOFAST +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+fast-string < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=FAST + +%struct.large = type { [4096 x i8] } + +declare void @foo(%struct.large* align 8 byval) nounwind + +define void @test1(%struct.large* nocapture %x) nounwind { + call void @foo(%struct.large* align 8 byval %x) + ret void + +; ALL-LABEL: test1: +; NOFAST: rep;movsq +; FAST: rep;movsb +} -- cgit v1.2.3