summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/MemCpyOpt
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2018-12-07 21:16:58 +0000
committerNikita Popov <nikita.ppv@gmail.com>2018-12-07 21:16:58 +0000
commit94b8e2ea4ec9246434181e152558cbc2c1c3c7d8 (patch)
treeb217ad259cdcfc0dece662c7c9fa4dbe06baf8ca /llvm/test/Transforms/MemCpyOpt
parent4ca00df57189d95b282cfc6296a51bc1058e670a (diff)
downloadbcm5719-llvm-94b8e2ea4ec9246434181e152558cbc2c1c3c7d8.tar.gz
bcm5719-llvm-94b8e2ea4ec9246434181e152558cbc2c1c3c7d8.zip
[MemCpyOpt] memset->memcpy forwarding with undef tail
Currently memcpyopt optimizes cases like memset(a, byte, N); memcpy(b, a, M); to memset(a, byte, N); memset(b, byte, M); if M <= N. Often this allows further simplifications down the line, which drop the first memset entirely. This patch extends this optimization for the case where M > N, but we know that the bytes a[N..M] are undef due to alloca/lifetime.start. This situation arises relatively often for Rust code, because Rust does not initialize trailing structure padding and loves to insert redundant memcpys. This also fixes https://bugs.llvm.org/show_bug.cgi?id=39844. For the implementation, I'm reusing a bit of code for a similar existing optimization (direct memcpy of undef). I've also added memset support to MemDepAnalysis GetLocation -- Instead, getPointerDependencyFrom could be used, but it seems to make more sense to add this to GetLocation and thus make the computation cachable. Differential Revision: https://reviews.llvm.org/D55120 llvm-svn: 348645
Diffstat (limited to 'llvm/test/Transforms/MemCpyOpt')
-rw-r--r--llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll8
1 files changed, 4 insertions, 4 deletions
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
index 39538be7224..7495400d16a 100644
--- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
@@ -12,7 +12,7 @@ define void @test_alloca(i8* %result) {
; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
; CHECK-NEXT: ret void
;
%a = alloca %T, align 8
@@ -28,7 +28,7 @@ define void @test_alloca_with_lifetimes(i8* %result) {
; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[B]])
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[B]])
; CHECK-NEXT: ret void
;
@@ -46,7 +46,7 @@ define void @test_malloc_with_lifetimes(i8* %result) {
; CHECK-NEXT: [[A:%.*]] = call i8* @malloc(i64 16)
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[A]])
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[A]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[A]])
; CHECK-NEXT: call void @free(i8* [[A]])
; CHECK-NEXT: ret void
@@ -98,7 +98,7 @@ define void @test_volatile_memset(i8* %result) {
; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 true)
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false)
; CHECK-NEXT: ret void
;
%a = alloca %T, align 8
OpenPOWER on IntegriCloud