diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-01-31 01:56:57 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-01-31 01:56:57 +0000 |
commit | 6d5a8d48fd762153c4f3ba8c0e9f9d35ce84e6d6 (patch) | |
tree | b1e458b9aa0059f81546e73129bf30b4999e3d47 /llvm/test/Transforms/InferAddressSpaces/AMDGPU | |
parent | f63f58a28f57891a2efc933bdd8c631013daa684 (diff) | |
download | bcm5719-llvm-6d5a8d48fd762153c4f3ba8c0e9f9d35ce84e6d6.tar.gz bcm5719-llvm-6d5a8d48fd762153c4f3ba8c0e9f9d35ce84e6d6.zip |
InferAddressSpaces: Support memory intrinsics
llvm-svn: 293587
Diffstat (limited to 'llvm/test/Transforms/InferAddressSpaces/AMDGPU')
3 files changed, 249 insertions, 1 deletions
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll new file mode 100644 index 00000000000..36ec424267c --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll @@ -0,0 +1,92 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s + +; CHECK-LABEL: @objectsize_group_to_flat_i32( +; CHECK: %val = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %group.ptr, i1 true) +define i32 @objectsize_group_to_flat_i32(i8 addrspace(3)* %group.ptr) #0 { + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* + %val = call i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)* %cast, i1 true) + ret i32 %val +} + +; CHECK-LABEL: @objectsize_global_to_flat_i64( +; CHECK: %val = call i64 @llvm.objectsize.i64.p3i8(i8 addrspace(3)* %global.ptr, i1 true) +define i64 @objectsize_global_to_flat_i64(i8 addrspace(3)* %global.ptr) #0 { + %cast = addrspacecast i8 addrspace(3)* %global.ptr to i8 addrspace(4)* + %val = call i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)* %cast, i1 true) + ret i64 %val +} + +; CHECK-LABEL: @atomicinc_global_to_flat_i32( +; CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %y) +define i32 @atomicinc_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %y) #0 { + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* + %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y) + ret i32 %ret +} + +; CHECK-LABEL: @atomicinc_group_to_flat_i32( +; CHECK: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %y) +define i32 @atomicinc_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %y) #0 { + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* + %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y) + ret i32 %ret +} + +; CHECK-LABEL: @atomicinc_global_to_flat_i64( +; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y) +define i64 @atomicinc_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 { + %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)* + %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y) + ret i64 %ret +} + +; CHECK-LABEL: @atomicinc_group_to_flat_i64( +; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y) +define i64 @atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 { + %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)* + %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y) + ret i64 %ret +} + +; CHECK-LABEL: @atomicdec_global_to_flat_i32( +; CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %val) +define i32 @atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 { + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* + %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val) + ret i32 %ret +} + +; CHECK-LABEL: @atomicdec_group_to_flat_i32( +; CHECK: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %val) +define i32 @atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 { + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* + %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val) + ret i32 %ret +} + +; CHECK-LABEL: @atomicdec_global_to_flat_i64( +; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y) +define i64 @atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 { + %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)* + %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y) + ret i64 %ret +} + +; CHECK-LABEL: @atomicdec_group_to_flat_i64( +; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y) +define i64 @atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 { + %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)* + %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y) + ret i64 %ret +} + +declare i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)*, i1) #1 +declare i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)*, i1) #1 +declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2 +declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2 +declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind argmemonly } diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll new file mode 100644 index 00000000000..afd1493fc0e --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll @@ -0,0 +1,134 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s + +; CHECK-LABEL: @memset_group_to_flat( +; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* + call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + ret void +} + +; CHECK-LABEL: @memset_global_to_flat( +; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { + %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* + call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + ret void +} + +; CHECK-LABEL: @memset_group_to_flat_no_md( +; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 %size, i32 4, i1 false){{$}} +define void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 { + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* + call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false) + ret void +} + +; CHECK-LABEL: @memset_global_to_flat_no_md( +; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 %size, i32 4, i1 false){{$}} +define void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 { + %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* + call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false) + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group( +; CHCK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group( +; CHECK: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 { + %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group( +; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %src.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* + %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global( +; CHECK: call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 { + %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8 addrspace(4)* + %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + ret void +} + +; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global( +; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 { + %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* %cast.dest, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct( +; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa.struct !7 +define void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa.struct !7 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md( +; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}} +define void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false) + ret void +} + +; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md( +; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}} +; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}} +define void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false) + call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false) + ret void +} + +; Check for iterator problems if the pointer has 2 uses in the same call +; CHECK-LABEL: @memcpy_group_flat_to_flat_self( +; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 addrspace(3)* %group.ptr, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 { + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* + call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast, i8 addrspace(4)* %cast, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + ret void +} +; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group( +; CHECK: call void @llvm.memmove.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* + call void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + ret void +} + +declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i32, i1) #1 +declare void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i32, i1) #1 + +attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind } + +!0 = !{!1, !1, i64 0} +!1 = !{!"A", !2} +!2 = !{!"tbaa root"} +!3 = !{!"B", !2} +!4 = !{!5} +!5 = distinct !{!5, !6, !"some scope"} +!6 = distinct !{!6, !"some domain"} +!7 = !{i64 0, i64 8, null}
\ No newline at end of file diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll index f32d65b66ae..d9b80e99bf0 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll @@ -115,4 +115,26 @@ define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, ret { i32, i1 } %ret } -attributes #0 = { nounwind }
\ No newline at end of file +; FIXME: Shouldn't be losing names +; CHECK-LABEL: @volatile_memset_group_to_flat( +; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* +; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true) +define void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* + call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true) + ret void +} + +; CHECK-LABEL: @volatile_memset_global_to_flat( +; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* +; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true) +define void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { + %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* + call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true) + ret void +} + +declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i32, i1) #1 + +attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind } |