diff options
| author | Nikita Popov <nikita.ppv@gmail.com> | 2018-12-07 21:16:58 +0000 |
|---|---|---|
| committer | Nikita Popov <nikita.ppv@gmail.com> | 2018-12-07 21:16:58 +0000 |
| commit | 94b8e2ea4ec9246434181e152558cbc2c1c3c7d8 (patch) | |
| tree | b217ad259cdcfc0dece662c7c9fa4dbe06baf8ca /llvm/test/Transforms/MemCpyOpt | |
| parent | 4ca00df57189d95b282cfc6296a51bc1058e670a (diff) | |
| download | bcm5719-llvm-94b8e2ea4ec9246434181e152558cbc2c1c3c7d8.tar.gz bcm5719-llvm-94b8e2ea4ec9246434181e152558cbc2c1c3c7d8.zip | |
[MemCpyOpt] memset->memcpy forwarding with undef tail
Currently memcpyopt optimizes cases like
memset(a, byte, N);
memcpy(b, a, M);
to
memset(a, byte, N);
memset(b, byte, M);
if M <= N. Often this allows further simplifications down the line,
which drop the first memset entirely.
This patch extends this optimization for the case where M > N, but we
know that the bytes a[N..M] are undef due to alloca/lifetime.start.
This situation arises relatively often for Rust code, because Rust does
not initialize trailing structure padding and loves to insert redundant
memcpys. This also fixes https://bugs.llvm.org/show_bug.cgi?id=39844.
For the implementation, I'm reusing a bit of code for a similar existing
optimization (direct memcpy of undef). I've also added memset support to
MemDepAnalysis GetLocation -- Instead, getPointerDependencyFrom could be
used, but it seems to make more sense to add this to GetLocation and thus
make the computation cachable.
Differential Revision: https://reviews.llvm.org/D55120
llvm-svn: 348645
Diffstat (limited to 'llvm/test/Transforms/MemCpyOpt')
| -rw-r--r-- | llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll index 39538be7224..7495400d16a 100644 --- a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll +++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll @@ -12,7 +12,7 @@ define void @test_alloca(i8* %result) { ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false) ; CHECK-NEXT: ret void ; %a = alloca %T, align 8 @@ -28,7 +28,7 @@ define void @test_alloca_with_lifetimes(i8* %result) { ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[B]]) ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false) ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[B]]) ; CHECK-NEXT: ret void ; @@ -46,7 +46,7 @@ define void @test_malloc_with_lifetimes(i8* %result) { ; CHECK-NEXT: [[A:%.*]] = call i8* @malloc(i64 16) ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[A]]) ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[A]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false) ; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[A]]) ; CHECK-NEXT: call void @free(i8* [[A]]) ; CHECK-NEXT: ret void @@ -98,7 +98,7 @@ define void @test_volatile_memset(i8* %result) { ; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8 ; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 true) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[RESULT:%.*]], i8 0, i64 12, i1 false) ; CHECK-NEXT: ret void ; %a = alloca %T, align 8 |

