diff options
author | Bjorn Steinbrink <bsteinbr@gmail.com> | 2014-10-16 19:43:08 +0000 |
---|---|---|
committer | Bjorn Steinbrink <bsteinbr@gmail.com> | 2014-10-16 19:43:08 +0000 |
commit | d20816fde95cb3c2467ef726790c06e8009a2653 (patch) | |
tree | d4d8f2f76be9647f098c886c8d40822c0e5fed92 /llvm/test/Transforms | |
parent | 51720673915e21fbe2756e9ec7a2b38f9b53396c (diff) | |
download | bcm5719-llvm-d20816fde95cb3c2467ef726790c06e8009a2653.tar.gz bcm5719-llvm-d20816fde95cb3c2467ef726790c06e8009a2653.zip |
Allow call-slop optzn for destinations with a suitable dereferenceable attribute
Summary:
Currently, call slot optimization requires that if the destination is an
argument, the argument has the sret attribute. This is to ensure that
the memory access won't trap. In addition to sret, we can also allow the
optimization to happen for arguments that have the new dereferenceable
attribute, which gives the same guarantee.
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D5832
llvm-svn: 219950
Diffstat (limited to 'llvm/test/Transforms')
-rw-r--r-- | llvm/test/Transforms/MemCpyOpt/callslot_deref.ll | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll new file mode 100644 index 00000000000..4d51552d015 --- /dev/null +++ b/llvm/test/Transforms/MemCpyOpt/callslot_deref.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) unnamed_addr nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind + +; all bytes of %dst that are touch by the memset are dereferenceable +define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) { +; CHECK-LABEL: @must_remove_memcpy( +; CHECK: call void @llvm.memset.p0i8.i64 +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64 + %src = alloca [4096 x i8], align 1 + %p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0 + call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2 + ret void +} + +; memset touch more bytes than those guaranteed to be dereferenceable +define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) { +; CHECK-LABEL: @must_not_remove_memcpy( +; CHECK: call void @llvm.memset.p0i8.i64 +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64 + %src = alloca [4096 x i8], align 1 + %p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0 + call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2 + ret void +} |