summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2018-12-07 21:16:52 +0000
committerNikita Popov <nikita.ppv@gmail.com>2018-12-07 21:16:52 +0000
commit4ca00df57189d95b282cfc6296a51bc1058e670a (patch)
treef9537488239d724a6a31be3cf155144f7a9c91cd
parentfab7d27f0ebf5efff3514d1bec91551d00deab34 (diff)
downloadbcm5719-llvm-4ca00df57189d95b282cfc6296a51bc1058e670a.tar.gz
bcm5719-llvm-4ca00df57189d95b282cfc6296a51bc1058e670a.zip
[MemCpyOpt] Add tests for memset->memcpy forwaring with undef tail; NFC
These are baseline tests for D55120. llvm-svn: 348644
-rw-r--r--llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll152
1 files changed, 152 insertions, 0 deletions
diff --git a/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
new file mode 100644
index 00000000000..39538be7224
--- /dev/null
+++ b/llvm/test/Transforms/MemCpyOpt/memset-memcpy-oversized.ll
@@ -0,0 +1,152 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -memcpyopt -S %s | FileCheck %s
+
+; memset -> memcpy forwarding, if memcpy is larger than memset, but trailing
+; bytes are known to be undef.
+
+
+%T = type { i64, i32, i32 }
+
+define void @test_alloca(i8* %result) {
+; CHECK-LABEL: @test_alloca(
+; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ %a = alloca %T, align 8
+ %b = bitcast %T* %a to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
+ ret void
+}
+
+define void @test_alloca_with_lifetimes(i8* %result) {
+; CHECK-LABEL: @test_alloca_with_lifetimes(
+; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[B]])
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[B]])
+; CHECK-NEXT: ret void
+;
+ %a = alloca %T, align 8
+ %b = bitcast %T* %a to i8*
+ call void @llvm.lifetime.start.p0i8(i64 16, i8* %b)
+ call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
+ call void @llvm.lifetime.end.p0i8(i64 16, i8* %b)
+ ret void
+}
+
+define void @test_malloc_with_lifetimes(i8* %result) {
+; CHECK-LABEL: @test_malloc_with_lifetimes(
+; CHECK-NEXT: [[A:%.*]] = call i8* @malloc(i64 16)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[A]])
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[A]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[A]])
+; CHECK-NEXT: call void @free(i8* [[A]])
+; CHECK-NEXT: ret void
+;
+ %a = call i8* @malloc(i64 16)
+ call void @llvm.lifetime.start.p0i8(i64 16, i8* %a)
+ call void @llvm.memset.p0i8.i64(i8* align 8 %a, i8 0, i64 12, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %a, i64 16, i1 false)
+ call void @llvm.lifetime.end.p0i8(i64 16, i8* %a)
+ call void @free(i8* %a)
+ ret void
+}
+
+; memcpy size is larger than lifetime, don't optimize.
+define void @test_copy_larger_than_lifetime_size(i8* %result) {
+; CHECK-LABEL: @test_copy_larger_than_lifetime_size(
+; CHECK-NEXT: [[A:%.*]] = call i8* @malloc(i64 16)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 12, i8* [[A]])
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[A]], i8 0, i64 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[A]], i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 12, i8* [[A]])
+; CHECK-NEXT: call void @free(i8* [[A]])
+; CHECK-NEXT: ret void
+;
+ %a = call i8* @malloc(i64 16)
+ call void @llvm.lifetime.start.p0i8(i64 12, i8* %a)
+ call void @llvm.memset.p0i8.i64(i8* align 8 %a, i8 0, i64 12, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %a, i64 16, i1 false)
+ call void @llvm.lifetime.end.p0i8(i64 12, i8* %a)
+ call void @free(i8* %a)
+ ret void
+}
+
+; The trailing bytes are not known to be undef, we can't ignore them.
+define void @test_not_undef_memory(i8* %result, i8* %input) {
+; CHECK-LABEL: @test_not_undef_memory(
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[INPUT:%.*]], i8 0, i64 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[INPUT]], i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.memset.p0i8.i64(i8* align 8 %input, i8 0, i64 12, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %input, i64 16, i1 false)
+ ret void
+}
+
+; Memset is volatile, memcpy is not. Can be optimized.
+define void @test_volatile_memset(i8* %result) {
+; CHECK-LABEL: @test_volatile_memset(
+; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 true)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ %a = alloca %T, align 8
+ %b = bitcast %T* %a to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 true)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
+ ret void
+}
+
+; Memcpy is volatile, memset is not. Cannot be optimized.
+define void @test_volatile_memcpy(i8* %result) {
+; CHECK-LABEL: @test_volatile_memcpy(
+; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 true)
+; CHECK-NEXT: ret void
+;
+ %a = alloca %T, align 8
+ %b = bitcast %T* %a to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 true)
+ ret void
+}
+
+; Write between memset and memcpy, can't optimize.
+define void @test_write_between(i8* %result) {
+; CHECK-LABEL: @test_write_between(
+; CHECK-NEXT: [[A:%.*]] = alloca [[T:%.*]], align 8
+; CHECK-NEXT: [[B:%.*]] = bitcast %T* [[A]] to i8*
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[B]], i8 0, i64 12, i1 false)
+; CHECK-NEXT: store i8 -1, i8* [[B]]
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[RESULT:%.*]], i8* align 8 [[B]], i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ %a = alloca %T, align 8
+ %b = bitcast %T* %a to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 8 %b, i8 0, i64 12, i1 false)
+ store i8 -1, i8* %b
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %result, i8* align 8 %b, i64 16, i1 false)
+ ret void
+}
+
+declare i8* @malloc(i64)
+declare void @free(i8*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
OpenPOWER on IntegriCloud