summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/InferAddressSpaces/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/Transforms/InferAddressSpaces/AMDGPU')
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll16
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll10
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll28
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll10
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll42
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll18
6 files changed, 62 insertions, 62 deletions
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
index 67b4ccda1a1..b566c147e9b 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
@@ -45,7 +45,7 @@ define float @load_private_from_flat(float addrspace(4)* %generic_scalar) #0 {
; CHECK-LABEL: @store_global_from_flat(
; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* %tmp0
-define void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 {
+define amdgpu_kernel void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 {
%tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
store float 0.0, float addrspace(1)* %tmp0
ret void
@@ -54,7 +54,7 @@ define void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 {
; CHECK-LABEL: @store_group_from_flat(
; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
; CHECK-NEXT: store float 0.000000e+00, float addrspace(3)* %tmp0
-define void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 {
+define amdgpu_kernel void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 {
%tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
store float 0.0, float addrspace(3)* %tmp0
ret void
@@ -63,7 +63,7 @@ define void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 {
; CHECK-LABEL: @store_private_from_flat(
; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
; CHECK-NEXT: store float 0.000000e+00, float* %tmp0
-define void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 {
+define amdgpu_kernel void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 {
%tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
store float 0.0, float* %tmp0
ret void
@@ -74,7 +74,7 @@ define void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 {
; CHECK-NEXT: %val = load i32, i32 addrspace(1)* %input, align 4
; CHECK-NEXT: store i32 %val, i32 addrspace(1)* %output, align 4
; CHECK-NEXT: ret void
-define void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
+define amdgpu_kernel void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
%tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %tmp0, align 4
@@ -87,7 +87,7 @@ define void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace
; CHECK-NEXT: %val = load i32, i32 addrspace(3)* %input, align 4
; CHECK-NEXT: store i32 %val, i32 addrspace(3)* %output, align 4
; CHECK-NEXT: ret void
-define void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
+define amdgpu_kernel void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
%tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %tmp0, align 4
@@ -100,7 +100,7 @@ define void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(
; CHECK-NEXT: %val = load i32, i32* %input, align 4
; CHECK-NEXT: store i32 %val, i32* %output, align 4
; CHECK-NEXT: ret void
-define void @load_store_private(i32* nocapture %input, i32* nocapture %output) #0 {
+define amdgpu_kernel void @load_store_private(i32* nocapture %input, i32* nocapture %output) #0 {
%tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
%tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %tmp0, align 4
@@ -113,7 +113,7 @@ define void @load_store_private(i32* nocapture %input, i32* nocapture %output) #
; CHECK-NEXT: %val = load i32, i32 addrspace(4)* %input, align 4
; CHECK-NEXT: store i32 %val, i32 addrspace(4)* %output, align 4
; CHECK-NEXT: ret void
-define void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4)* nocapture %output) #0 {
+define amdgpu_kernel void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4)* nocapture %output) #0 {
%val = load i32, i32 addrspace(4)* %input, align 4
store i32 %val, i32 addrspace(4)* %output, align 4
ret void
@@ -122,7 +122,7 @@ define void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4
; CHECK-LABEL: @store_addrspacecast_ptr_value(
; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
; CHECK-NEXT: store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4
-define void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32 addrspace(4)* addrspace(1)* nocapture %output) #0 {
+define amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32 addrspace(4)* addrspace(1)* nocapture %output) #0 {
%cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4
ret void
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
index aad9db63269..52067cd37bb 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
@@ -28,7 +28,7 @@
; CHECK: store float %v, float addrspace(3)* %tmp7, align 4
; CHECK: call void @llvm.amdgcn.s.barrier()
; CHECK: ret void
-define void @load_store_lds_f32(i32 %i, float %v) #0 {
+define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 {
bb:
%tmp = load float, float addrspace(4)* addrspacecast (float addrspace(3)* @scalar to float addrspace(4)*), align 4
call void @use(float %tmp)
@@ -83,7 +83,7 @@ bb:
; CHECK-LABEL: @nested_const_expr(
; CHECK: store i32 1, i32 addrspace(3)* bitcast (float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i64 0, i64 1) to i32 addrspace(3)*), align 4
-define void @nested_const_expr() #0 {
+define amdgpu_kernel void @nested_const_expr() #0 {
store i32 1, i32 addrspace(4)* bitcast (float addrspace(4)* getelementptr ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i64 0, i64 1) to i32 addrspace(4)*), align 4
ret void
}
@@ -93,7 +93,7 @@ define void @nested_const_expr() #0 {
; CHECK-NEXT: %v = load float, float addrspace(1)* %addr
; CHECK-NEXT: store float %v, float addrspace(1)* %addr
; CHECK-NEXT: ret void
-define void @rauw(float addrspace(1)* %input) #0 {
+define amdgpu_kernel void @rauw(float addrspace(1)* %input) #0 {
bb:
%generic_input = addrspacecast float addrspace(1)* %input to float addrspace(4)*
%addr = getelementptr float, float addrspace(4)* %generic_input, i64 10
@@ -117,7 +117,7 @@ bb:
; CHECK: %exit_cond = icmp eq float addrspace(3)* %i2, %end
; CHECK: br i1 %exit_cond, label %exit, label %loop
-define void @loop() #0 {
+define amdgpu_kernel void @loop() #0 {
entry:
%p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)*
%end = getelementptr float, float addrspace(4)* %p, i64 10
@@ -150,7 +150,7 @@ exit: ; preds = %loop
; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float addrspace(4)*
; CHECK: %exit_cond = icmp eq float addrspace(4)* %0, %end
; CHECK: br i1 %exit_cond, label %exit, label %loop
-define void @loop_with_generic_bound() #0 {
+define amdgpu_kernel void @loop_with_generic_bound() #0 {
entry:
%p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)*
%end = load float addrspace(4)*, float addrspace(4)* addrspace(1)* @generic_end
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
index afd1493fc0e..557a80f1a5d 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
@@ -2,7 +2,7 @@
; CHECK-LABEL: @memset_group_to_flat(
; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
+define amdgpu_kernel void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
@@ -10,7 +10,7 @@ define void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
; CHECK-LABEL: @memset_global_to_flat(
; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
+define amdgpu_kernel void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
%cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
@@ -18,7 +18,7 @@ define void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
; CHECK-LABEL: @memset_group_to_flat_no_md(
; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 %size, i32 4, i1 false){{$}}
-define void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 {
+define amdgpu_kernel void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 {
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false)
ret void
@@ -26,7 +26,7 @@ define void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size)
; CHECK-LABEL: @memset_global_to_flat_no_md(
; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 %size, i32 4, i1 false){{$}}
-define void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 {
+define amdgpu_kernel void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 {
%cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false)
ret void
@@ -34,7 +34,7 @@ define void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group(
; CHCK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
@@ -42,7 +42,7 @@ define void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest,
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group(
; CHECK: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 {
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 {
%cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
@@ -50,7 +50,7 @@ define void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group(
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %src.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
%cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
@@ -59,7 +59,7 @@ define void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %d
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global(
; CHECK: call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 {
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8 addrspace(4)*
%cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
@@ -68,7 +68,7 @@ define void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)*
; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global(
; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 {
+define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 {
%cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* %cast.dest, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
@@ -76,7 +76,7 @@ define void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.glo
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa.struct !7
-define void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa.struct !7
ret void
@@ -84,7 +84,7 @@ define void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md(
; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
-define void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
ret void
@@ -93,7 +93,7 @@ define void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %
; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}}
-define void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false)
@@ -103,14 +103,14 @@ define void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrsp
; Check for iterator problems if the pointer has 2 uses in the same call
; CHECK-LABEL: @memcpy_group_flat_to_flat_self(
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 addrspace(3)* %group.ptr, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 {
+define amdgpu_kernel void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 {
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast, i8 addrspace(4)* %cast, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group(
; CHECK: call void @llvm.memmove.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
%cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
call void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
index 17997052f07..3231b6ccf1c 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
@@ -9,7 +9,7 @@
; CHECK-LABEL: @generic_address_bitcast_const(
; CHECK: %vecload1 = load <2 x double>, <2 x double> addrspace(1)* bitcast (double addrspace(1)* getelementptr inbounds ([100 x double], [100 x double] addrspace(1)* @data, i64 0, i64 4) to <2 x double> addrspace(1)*), align 8
-define void @generic_address_bitcast_const(i64 %arg0, i32 addrspace(1)* nocapture %results) #0 {
+define amdgpu_kernel void @generic_address_bitcast_const(i64 %arg0, i32 addrspace(1)* nocapture %results) #0 {
entry:
%tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = zext i32 %tmp1 to i64
@@ -39,7 +39,7 @@ declare i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)*)
; CHECK: %tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32 addrspace(3)*
; CHECK: %add.ptr = getelementptr inbounds i32, i32 addrspace(3)* %tmp1, i32 2
; CHECK: %tmp2 = load i32, i32 addrspace(3)* %add.ptr, align 4
-define void @generic_address_pipe_bug9673(%opencl.pipe_t addrspace(3)* nocapture %in_pipe, i32 addrspace(1)* nocapture %dst) #0 {
+define amdgpu_kernel void @generic_address_pipe_bug9673(%opencl.pipe_t addrspace(3)* nocapture %in_pipe, i32 addrspace(1)* nocapture %dst) #0 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32 addrspace(3)*
@@ -55,7 +55,7 @@ entry:
; CHECK: br i1
; CHECK: load float, float addrspace(4)*
; CHECK: br label
-define void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 {
+define amdgpu_kernel void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 {
entry:
%ptr = alloca float addrspace(4)*, align 8
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
@@ -85,7 +85,7 @@ helperFunction.exit: ; preds = %if.end.i, %entry
; CHECK-LABEL: @generic_address_opt_phi_bug9776_simple_phi_kernel(
; CHECK: phi i32 addrspace(3)*
; CHECK: store i32 %i.03, i32 addrspace(3)* %
-define void @generic_address_opt_phi_bug9776_simple_phi_kernel(i32 addrspace(3)* nocapture %in, i32 %numElems) #0 {
+define amdgpu_kernel void @generic_address_opt_phi_bug9776_simple_phi_kernel(i32 addrspace(3)* nocapture %in, i32 %numElems) #0 {
entry:
%cmp1 = icmp eq i32 %numElems, 0
br i1 %cmp1, label %for.end, label %for.body.lr.ph
@@ -110,7 +110,7 @@ for.end: ; preds = %for.body, %entry
; CHECK-LABEL: @generic_address_bug9899(
; CHECK: %vecload = load <2 x i32>, <2 x i32> addrspace(3)*
; CHECK: store <2 x i32> %tmp16, <2 x i32> addrspace(3)*
-define void @generic_address_bug9899(i64 %arg0, i32 addrspace(3)* nocapture %sourceA, i32 addrspace(3)* nocapture %destValues) #0 {
+define amdgpu_kernel void @generic_address_bug9899(i64 %arg0, i32 addrspace(3)* nocapture %sourceA, i32 addrspace(3)* nocapture %destValues) #0 {
entry:
%tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = zext i32 %tmp1 to i64
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
index bcbca16d7af..08edc20ecf9 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
@@ -18,7 +18,7 @@ define i32 addrspace(4)* @return_select_group_flat(i1 %c, i32 addrspace(3)* %gro
; CHECK-LABEL: @store_select_group_flat(
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1
; CHECK: store i32 -1, i32 addrspace(3)* %select
-define void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
+define amdgpu_kernel void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
@@ -43,7 +43,7 @@ define i32 @load_select_group_flat_md(i1 %c, i32 addrspace(3)* %group.ptr.0, i32
; CHECK: %2 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)*
; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* %2
; CHECK: store i32 -1, i32 addrspace(4)* %select
-define void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32* %private.ptr.1) #0 {
+define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32* %private.ptr.1) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%cast1 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
@@ -73,7 +73,7 @@ bb:
; CHECK-LABEL: @store_select_group_flat_null(
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
; CHECK: store i32 -1, i32 addrspace(3)* %select
-define void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
+define amdgpu_kernel void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null
store i32 -1, i32 addrspace(4)* %select
@@ -83,7 +83,7 @@ define void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0)
; CHECK-LABEL: @store_select_group_flat_null_swap(
; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0
; CHECK: store i32 -1, i32 addrspace(3)* %select
-define void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
+define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* null, i32 addrspace(4)* %cast0
store i32 -1, i32 addrspace(4)* %select
@@ -93,7 +93,7 @@ define void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.p
; CHECK-LABEL: @store_select_group_flat_undef(
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* undef
; CHECK: store i32 -1, i32 addrspace(3)* %select
-define void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
+define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* undef
store i32 -1, i32 addrspace(4)* %select
@@ -103,7 +103,7 @@ define void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0
; CHECK-LABEL: @store_select_group_flat_undef_swap(
; CHECK: %select = select i1 %c, i32 addrspace(3)* undef, i32 addrspace(3)* %group.ptr.0
; CHECK: store i32 -1, i32 addrspace(3)* %select
-define void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
+define amdgpu_kernel void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* undef, i32 addrspace(4)* %cast0
store i32 -1, i32 addrspace(4)* %select
@@ -114,7 +114,7 @@ define void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
; CHECK: %gep = getelementptr i32, i32 addrspace(3)* %select, i64 16
; CHECK: store i32 -1, i32 addrspace(3)* %gep
-define void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
+define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null
%gep = getelementptr i32, i32 addrspace(4)* %select, i64 16
@@ -127,7 +127,7 @@ define void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.pt
; CHECK-LABEL: @store_select_group_flat_constexpr(
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* @lds1
; CHECK: store i32 7, i32 addrspace(3)* %select
-define void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
+define amdgpu_kernel void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds1 to i32 addrspace(4)*)
store i32 7, i32 addrspace(4)* %select
@@ -137,7 +137,7 @@ define void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.p
; CHECK-LABEL: @store_select_group_flat_inttoptr_flat(
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*) to i32 addrspace(3)*)
; CHECK: store i32 7, i32 addrspace(3)* %select
-define void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
+define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*)
store i32 7, i32 addrspace(4)* %select
@@ -147,7 +147,7 @@ define void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %gro
; CHECK-LABEL: @store_select_group_flat_inttoptr_group(
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*)
; CHECK-NEXT: store i32 7, i32 addrspace(3)* %select
-define void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
+define amdgpu_kernel void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32 addrspace(4)*)
store i32 7, i32 addrspace(4)* %select
@@ -158,7 +158,7 @@ define void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %gr
; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
; CHECK: store i32 7, i32 addrspace(4)* %select
-define void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
+define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
store i32 7, i32 addrspace(4)* %select
@@ -169,7 +169,7 @@ define void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrsp
; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %1
; CHECK: store i32 7, i32 addrspace(4)* %select
-define void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
+define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %cast0
store i32 7, i32 addrspace(4)* %select
@@ -179,7 +179,7 @@ define void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 a
; CHECK-LABEL: @store_select_group_global_mismatch_null_null(
; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)
; CHECK: store i32 7, i32 addrspace(4)* %select
-define void @store_select_group_global_mismatch_null_null(i1 %c) #0 {
+define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 {
%select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)
store i32 7, i32 addrspace(4)* %select
ret void
@@ -187,42 +187,42 @@ define void @store_select_group_global_mismatch_null_null(i1 %c) #0 {
; CHECK-LABEL: @store_select_group_global_mismatch_null_null_constexpr(
; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
-define void @store_select_group_global_mismatch_null_null_constexpr() #0 {
+define amdgpu_kernel void @store_select_group_global_mismatch_null_null_constexpr() #0 {
store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_gv_null_constexpr(
; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
-define void @store_select_group_global_mismatch_gv_null_constexpr() #0 {
+define amdgpu_kernel void @store_select_group_global_mismatch_gv_null_constexpr() #0 {
store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_null_gv_constexpr(
; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4
-define void @store_select_group_global_mismatch_null_gv_constexpr() #0 {
+define amdgpu_kernel void @store_select_group_global_mismatch_null_gv_constexpr() #0 {
store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_null_constexpr(
; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
-define void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 {
+define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 {
store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_flat_null_constexpr(
; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4
-define void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 {
+define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 {
store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr(
; CHECK: store i32 7, i32 addrspace(3)* null
-define void @store_select_group_global_mismatch_undef_undef_constexpr() #0 {
+define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 {
store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* undef to i32 addrspace(4)*)), align 4
ret void
}
@@ -233,7 +233,7 @@ define void @store_select_group_global_mismatch_undef_undef_constexpr() #0 {
; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*)
; CHECK: store i32 7, i32 addrspace(4)* %select
-define void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
+define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
%cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
%select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*)
store i32 7, i32 addrspace(4)* %select
@@ -248,7 +248,7 @@ define void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %gro
; CHECK: %extract1 = extractelement <2 x i32 addrspace(4)*> %select, i32 1
; CHECK: store i32 -1, i32 addrspace(4)* %extract0
; CHECK: store i32 -2, i32 addrspace(4)* %extract1
-define void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 {
+define amdgpu_kernel void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 {
%cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*>
%cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*>
%select = select i1 %c, <2 x i32 addrspace(4)*> %cast0, <2 x i32 addrspace(4)*> %cast1
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
index d9b80e99bf0..79bf92610a8 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
@@ -5,7 +5,7 @@
; CHECK-LABEL: @volatile_load_flat_from_global(
; CHECK: load volatile i32, i32 addrspace(4)*
; CHECK: store i32 %val, i32 addrspace(1)*
-define void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
+define amdgpu_kernel void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
%tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
%val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
@@ -16,7 +16,7 @@ define void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input,
; CHECK-LABEL: @volatile_load_flat_from_constant(
; CHECK: load volatile i32, i32 addrspace(4)*
; CHECK: store i32 %val, i32 addrspace(1)*
-define void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
+define amdgpu_kernel void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(2)* %input to i32 addrspace(4)*
%tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
%val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
@@ -27,7 +27,7 @@ define void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input
; CHECK-LABEL: @volatile_load_flat_from_group(
; CHECK: load volatile i32, i32 addrspace(4)*
; CHECK: store i32 %val, i32 addrspace(3)*
-define void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
+define amdgpu_kernel void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
%tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
%val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
@@ -38,7 +38,7 @@ define void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i
; CHECK-LABEL: @volatile_load_flat_from_private(
; CHECK: load volatile i32, i32 addrspace(4)*
; CHECK: store i32 %val, i32*
-define void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocapture %output) #0 {
+define amdgpu_kernel void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocapture %output) #0 {
%tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
%tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
%val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
@@ -49,7 +49,7 @@ define void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocaptu
; CHECK-LABEL: @volatile_store_flat_to_global(
; CHECK: load i32, i32 addrspace(1)*
; CHECK: store volatile i32 %val, i32 addrspace(4)*
-define void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
+define amdgpu_kernel void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
%tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %tmp0, align 4
@@ -60,7 +60,7 @@ define void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i
; CHECK-LABEL: @volatile_store_flat_to_group(
; CHECK: load i32, i32 addrspace(3)*
; CHECK: store volatile i32 %val, i32 addrspace(4)*
-define void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
+define amdgpu_kernel void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
%tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %tmp0, align 4
@@ -71,7 +71,7 @@ define void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i3
; CHECK-LABEL: @volatile_store_flat_to_private(
; CHECK: load i32, i32*
; CHECK: store volatile i32 %val, i32 addrspace(4)*
-define void @volatile_store_flat_to_private(i32* nocapture %input, i32* nocapture %output) #0 {
+define amdgpu_kernel void @volatile_store_flat_to_private(i32* nocapture %input, i32* nocapture %output) #0 {
%tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
%tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
%val = load i32, i32 addrspace(4)* %tmp0, align 4
@@ -119,7 +119,7 @@ define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr,
; CHECK-LABEL: @volatile_memset_group_to_flat(
; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true)
-define void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
+define amdgpu_kernel void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true)
ret void
@@ -128,7 +128,7 @@ define void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y)
; CHECK-LABEL: @volatile_memset_global_to_flat(
; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true)
-define void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
+define amdgpu_kernel void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
%cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true)
ret void
OpenPOWER on IntegriCloud