diff options
Diffstat (limited to 'llvm/test/Transforms/InferAddressSpaces/AMDGPU')
6 files changed, 62 insertions, 62 deletions
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll index 67b4ccda1a1..b566c147e9b 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll @@ -45,7 +45,7 @@ define float @load_private_from_flat(float addrspace(4)* %generic_scalar) #0 { ; CHECK-LABEL: @store_global_from_flat( ; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)* ; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* %tmp0 -define void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 { +define amdgpu_kernel void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 { %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)* store float 0.0, float addrspace(1)* %tmp0 ret void @@ -54,7 +54,7 @@ define void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 { ; CHECK-LABEL: @store_group_from_flat( ; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)* ; CHECK-NEXT: store float 0.000000e+00, float addrspace(3)* %tmp0 -define void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 { +define amdgpu_kernel void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 { %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)* store float 0.0, float addrspace(3)* %tmp0 ret void @@ -63,7 +63,7 @@ define void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 { ; CHECK-LABEL: @store_private_from_flat( ; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float* ; CHECK-NEXT: store float 0.000000e+00, float* %tmp0 -define void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 { +define amdgpu_kernel void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 { %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float* store float 0.0, float* %tmp0 ret void @@ -74,7 +74,7 @@ define void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 { ; CHECK-NEXT: %val = load i32, i32 addrspace(1)* %input, align 4 ; CHECK-NEXT: store i32 %val, i32 addrspace(1)* %output, align 4 ; CHECK-NEXT: ret void -define void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { +define amdgpu_kernel void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %tmp0, align 4 @@ -87,7 +87,7 @@ define void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace ; CHECK-NEXT: %val = load i32, i32 addrspace(3)* %input, align 4 ; CHECK-NEXT: store i32 %val, i32 addrspace(3)* %output, align 4 ; CHECK-NEXT: ret void -define void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { +define amdgpu_kernel void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %tmp0, align 4 @@ -100,7 +100,7 @@ define void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace( ; CHECK-NEXT: %val = load i32, i32* %input, align 4 ; CHECK-NEXT: store i32 %val, i32* %output, align 4 ; CHECK-NEXT: ret void -define void @load_store_private(i32* nocapture %input, i32* nocapture %output) #0 { +define amdgpu_kernel void @load_store_private(i32* nocapture %input, i32* nocapture %output) #0 { %tmp0 = addrspacecast i32* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32* %output to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %tmp0, align 4 @@ -113,7 +113,7 @@ define void @load_store_private(i32* nocapture %input, i32* nocapture %output) # ; CHECK-NEXT: %val = load i32, i32 addrspace(4)* %input, align 4 ; CHECK-NEXT: store i32 %val, i32 addrspace(4)* %output, align 4 ; CHECK-NEXT: ret void -define void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4)* nocapture %output) #0 { +define amdgpu_kernel void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4)* nocapture %output) #0 { %val = load i32, i32 addrspace(4)* %input, align 4 store i32 %val, i32 addrspace(4)* %output, align 4 ret void @@ -122,7 +122,7 @@ define void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4 ; CHECK-LABEL: @store_addrspacecast_ptr_value( ; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* ; CHECK-NEXT: store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4 -define void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32 addrspace(4)* addrspace(1)* nocapture %output) #0 { +define amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32 addrspace(4)* addrspace(1)* nocapture %output) #0 { %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4 ret void diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll index aad9db63269..52067cd37bb 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll @@ -28,7 +28,7 @@ ; CHECK: store float %v, float addrspace(3)* %tmp7, align 4 ; CHECK: call void @llvm.amdgcn.s.barrier() ; CHECK: ret void -define void @load_store_lds_f32(i32 %i, float %v) #0 { +define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 { bb: %tmp = load float, float addrspace(4)* addrspacecast (float addrspace(3)* @scalar to float addrspace(4)*), align 4 call void @use(float %tmp) @@ -83,7 +83,7 @@ bb: ; CHECK-LABEL: @nested_const_expr( ; CHECK: store i32 1, i32 addrspace(3)* bitcast (float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i64 0, i64 1) to i32 addrspace(3)*), align 4 -define void @nested_const_expr() #0 { +define amdgpu_kernel void @nested_const_expr() #0 { store i32 1, i32 addrspace(4)* bitcast (float addrspace(4)* getelementptr ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i64 0, i64 1) to i32 addrspace(4)*), align 4 ret void } @@ -93,7 +93,7 @@ define void @nested_const_expr() #0 { ; CHECK-NEXT: %v = load float, float addrspace(1)* %addr ; CHECK-NEXT: store float %v, float addrspace(1)* %addr ; CHECK-NEXT: ret void -define void @rauw(float addrspace(1)* %input) #0 { +define amdgpu_kernel void @rauw(float addrspace(1)* %input) #0 { bb: %generic_input = addrspacecast float addrspace(1)* %input to float addrspace(4)* %addr = getelementptr float, float addrspace(4)* %generic_input, i64 10 @@ -117,7 +117,7 @@ bb: ; CHECK: %exit_cond = icmp eq float addrspace(3)* %i2, %end ; CHECK: br i1 %exit_cond, label %exit, label %loop -define void @loop() #0 { +define amdgpu_kernel void @loop() #0 { entry: %p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)* %end = getelementptr float, float addrspace(4)* %p, i64 10 @@ -150,7 +150,7 @@ exit: ; preds = %loop ; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float addrspace(4)* ; CHECK: %exit_cond = icmp eq float addrspace(4)* %0, %end ; CHECK: br i1 %exit_cond, label %exit, label %loop -define void @loop_with_generic_bound() #0 { +define amdgpu_kernel void @loop_with_generic_bound() #0 { entry: %p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)* %end = load float addrspace(4)*, float addrspace(4)* addrspace(1)* @generic_end diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll index afd1493fc0e..557a80f1a5d 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll @@ -2,7 +2,7 @@ ; CHECK-LABEL: @memset_group_to_flat( ; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { +define amdgpu_kernel void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void @@ -10,7 +10,7 @@ define void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { ; CHECK-LABEL: @memset_global_to_flat( ; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { +define amdgpu_kernel void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void @@ -18,7 +18,7 @@ define void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { ; CHECK-LABEL: @memset_group_to_flat_no_md( ; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 %size, i32 4, i1 false){{$}} -define void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 { +define amdgpu_kernel void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 { %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false) ret void @@ -26,7 +26,7 @@ define void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) ; CHECK-LABEL: @memset_global_to_flat_no_md( ; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 %size, i32 4, i1 false){{$}} -define void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 { +define amdgpu_kernel void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 { %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false) ret void @@ -34,7 +34,7 @@ define void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size ; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group( ; CHCK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { +define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void @@ -42,7 +42,7 @@ define void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, ; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group( ; CHECK: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 { +define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 { %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void @@ -50,7 +50,7 @@ define void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest. ; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group( ; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %src.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { +define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 @@ -59,7 +59,7 @@ define void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %d ; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global( ; CHECK: call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 { +define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8 addrspace(4)* %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 @@ -68,7 +68,7 @@ define void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* ; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global( ; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 { +define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 { %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* %cast.dest, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void @@ -76,7 +76,7 @@ define void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.glo ; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct( ; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa.struct !7 -define void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { +define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa.struct !7 ret void @@ -84,7 +84,7 @@ define void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace ; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md( ; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}} -define void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { +define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false) ret void @@ -93,7 +93,7 @@ define void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* % ; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md( ; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}} ; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}} -define void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { +define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false) call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false) @@ -103,14 +103,14 @@ define void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrsp ; Check for iterator problems if the pointer has 2 uses in the same call ; CHECK-LABEL: @memcpy_group_flat_to_flat_self( ; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 addrspace(3)* %group.ptr, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 { +define amdgpu_kernel void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 { %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast, i8 addrspace(4)* %cast, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void } ; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group( ; CHECK: call void @llvm.memmove.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { +define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* call void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll index 17997052f07..3231b6ccf1c 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll @@ -9,7 +9,7 @@ ; CHECK-LABEL: @generic_address_bitcast_const( ; CHECK: %vecload1 = load <2 x double>, <2 x double> addrspace(1)* bitcast (double addrspace(1)* getelementptr inbounds ([100 x double], [100 x double] addrspace(1)* @data, i64 0, i64 4) to <2 x double> addrspace(1)*), align 8 -define void @generic_address_bitcast_const(i64 %arg0, i32 addrspace(1)* nocapture %results) #0 { +define amdgpu_kernel void @generic_address_bitcast_const(i64 %arg0, i32 addrspace(1)* nocapture %results) #0 { entry: %tmp1 = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = zext i32 %tmp1 to i64 @@ -39,7 +39,7 @@ declare i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)*) ; CHECK: %tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32 addrspace(3)* ; CHECK: %add.ptr = getelementptr inbounds i32, i32 addrspace(3)* %tmp1, i32 2 ; CHECK: %tmp2 = load i32, i32 addrspace(3)* %add.ptr, align 4 -define void @generic_address_pipe_bug9673(%opencl.pipe_t addrspace(3)* nocapture %in_pipe, i32 addrspace(1)* nocapture %dst) #0 { +define amdgpu_kernel void @generic_address_pipe_bug9673(%opencl.pipe_t addrspace(3)* nocapture %in_pipe, i32 addrspace(1)* nocapture %dst) #0 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32 addrspace(3)* @@ -55,7 +55,7 @@ entry: ; CHECK: br i1 ; CHECK: load float, float addrspace(4)* ; CHECK: br label -define void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 { +define amdgpu_kernel void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 { entry: %ptr = alloca float addrspace(4)*, align 8 %tmp = call i32 @llvm.amdgcn.workitem.id.x() @@ -85,7 +85,7 @@ helperFunction.exit: ; preds = %if.end.i, %entry ; CHECK-LABEL: @generic_address_opt_phi_bug9776_simple_phi_kernel( ; CHECK: phi i32 addrspace(3)* ; CHECK: store i32 %i.03, i32 addrspace(3)* % -define void @generic_address_opt_phi_bug9776_simple_phi_kernel(i32 addrspace(3)* nocapture %in, i32 %numElems) #0 { +define amdgpu_kernel void @generic_address_opt_phi_bug9776_simple_phi_kernel(i32 addrspace(3)* nocapture %in, i32 %numElems) #0 { entry: %cmp1 = icmp eq i32 %numElems, 0 br i1 %cmp1, label %for.end, label %for.body.lr.ph @@ -110,7 +110,7 @@ for.end: ; preds = %for.body, %entry ; CHECK-LABEL: @generic_address_bug9899( ; CHECK: %vecload = load <2 x i32>, <2 x i32> addrspace(3)* ; CHECK: store <2 x i32> %tmp16, <2 x i32> addrspace(3)* -define void @generic_address_bug9899(i64 %arg0, i32 addrspace(3)* nocapture %sourceA, i32 addrspace(3)* nocapture %destValues) #0 { +define amdgpu_kernel void @generic_address_bug9899(i64 %arg0, i32 addrspace(3)* nocapture %sourceA, i32 addrspace(3)* nocapture %destValues) #0 { entry: %tmp1 = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = zext i32 %tmp1 to i64 diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll index bcbca16d7af..08edc20ecf9 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll @@ -18,7 +18,7 @@ define i32 addrspace(4)* @return_select_group_flat(i1 %c, i32 addrspace(3)* %gro ; CHECK-LABEL: @store_select_group_flat( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1 ; CHECK: store i32 -1, i32 addrspace(3)* %select -define void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { +define amdgpu_kernel void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1 @@ -43,7 +43,7 @@ define i32 @load_select_group_flat_md(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 ; CHECK: %2 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)* ; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* %2 ; CHECK: store i32 -1, i32 addrspace(4)* %select -define void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32* %private.ptr.1) #0 { +define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32* %private.ptr.1) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %cast1 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1 @@ -73,7 +73,7 @@ bb: ; CHECK-LABEL: @store_select_group_flat_null( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) ; CHECK: store i32 -1, i32 addrspace(3)* %select -define void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null store i32 -1, i32 addrspace(4)* %select @@ -83,7 +83,7 @@ define void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) ; CHECK-LABEL: @store_select_group_flat_null_swap( ; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0 ; CHECK: store i32 -1, i32 addrspace(3)* %select -define void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* null, i32 addrspace(4)* %cast0 store i32 -1, i32 addrspace(4)* %select @@ -93,7 +93,7 @@ define void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.p ; CHECK-LABEL: @store_select_group_flat_undef( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* undef ; CHECK: store i32 -1, i32 addrspace(3)* %select -define void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* undef store i32 -1, i32 addrspace(4)* %select @@ -103,7 +103,7 @@ define void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0 ; CHECK-LABEL: @store_select_group_flat_undef_swap( ; CHECK: %select = select i1 %c, i32 addrspace(3)* undef, i32 addrspace(3)* %group.ptr.0 ; CHECK: store i32 -1, i32 addrspace(3)* %select -define void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* undef, i32 addrspace(4)* %cast0 store i32 -1, i32 addrspace(4)* %select @@ -114,7 +114,7 @@ define void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group. ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) ; CHECK: %gep = getelementptr i32, i32 addrspace(3)* %select, i64 16 ; CHECK: store i32 -1, i32 addrspace(3)* %gep -define void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null %gep = getelementptr i32, i32 addrspace(4)* %select, i64 16 @@ -127,7 +127,7 @@ define void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.pt ; CHECK-LABEL: @store_select_group_flat_constexpr( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* @lds1 ; CHECK: store i32 7, i32 addrspace(3)* %select -define void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds1 to i32 addrspace(4)*) store i32 7, i32 addrspace(4)* %select @@ -137,7 +137,7 @@ define void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.p ; CHECK-LABEL: @store_select_group_flat_inttoptr_flat( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*) to i32 addrspace(3)*) ; CHECK: store i32 7, i32 addrspace(3)* %select -define void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*) store i32 7, i32 addrspace(4)* %select @@ -147,7 +147,7 @@ define void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %gro ; CHECK-LABEL: @store_select_group_flat_inttoptr_group( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) ; CHECK-NEXT: store i32 7, i32 addrspace(3)* %select -define void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32 addrspace(4)*) store i32 7, i32 addrspace(4)* %select @@ -158,7 +158,7 @@ define void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %gr ; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* ; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*) ; CHECK: store i32 7, i32 addrspace(4)* %select -define void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*) store i32 7, i32 addrspace(4)* %select @@ -169,7 +169,7 @@ define void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrsp ; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* ; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %1 ; CHECK: store i32 7, i32 addrspace(4)* %select -define void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %cast0 store i32 7, i32 addrspace(4)* %select @@ -179,7 +179,7 @@ define void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 a ; CHECK-LABEL: @store_select_group_global_mismatch_null_null( ; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*) ; CHECK: store i32 7, i32 addrspace(4)* %select -define void @store_select_group_global_mismatch_null_null(i1 %c) #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 { %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*) store i32 7, i32 addrspace(4)* %select ret void @@ -187,42 +187,42 @@ define void @store_select_group_global_mismatch_null_null(i1 %c) #0 { ; CHECK-LABEL: @store_select_group_global_mismatch_null_null_constexpr( ; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 -define void @store_select_group_global_mismatch_null_null_constexpr() #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_null_null_constexpr() #0 { store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_gv_null_constexpr( ; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 -define void @store_select_group_global_mismatch_gv_null_constexpr() #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_gv_null_constexpr() #0 { store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_null_gv_constexpr( ; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4 -define void @store_select_group_global_mismatch_null_gv_constexpr() #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_null_gv_constexpr() #0 { store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_null_constexpr( ; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 -define void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 { store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_flat_null_constexpr( ; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4 -define void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 { store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr( ; CHECK: store i32 7, i32 addrspace(3)* null -define void @store_select_group_global_mismatch_undef_undef_constexpr() #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 { store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* undef to i32 addrspace(4)*)), align 4 ret void } @@ -233,7 +233,7 @@ define void @store_select_group_global_mismatch_undef_undef_constexpr() #0 { ; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* ; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*) ; CHECK: store i32 7, i32 addrspace(4)* %select -define void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*) store i32 7, i32 addrspace(4)* %select @@ -248,7 +248,7 @@ define void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %gro ; CHECK: %extract1 = extractelement <2 x i32 addrspace(4)*> %select, i32 1 ; CHECK: store i32 -1, i32 addrspace(4)* %extract0 ; CHECK: store i32 -2, i32 addrspace(4)* %extract1 -define void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 { +define amdgpu_kernel void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 { %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*> %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*> %select = select i1 %c, <2 x i32 addrspace(4)*> %cast0, <2 x i32 addrspace(4)*> %cast1 diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll index d9b80e99bf0..79bf92610a8 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: @volatile_load_flat_from_global( ; CHECK: load volatile i32, i32 addrspace(4)* ; CHECK: store i32 %val, i32 addrspace(1)* -define void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { +define amdgpu_kernel void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)* %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4 @@ -16,7 +16,7 @@ define void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, ; CHECK-LABEL: @volatile_load_flat_from_constant( ; CHECK: load volatile i32, i32 addrspace(4)* ; CHECK: store i32 %val, i32 addrspace(1)* -define void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { +define amdgpu_kernel void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(2)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)* %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4 @@ -27,7 +27,7 @@ define void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input ; CHECK-LABEL: @volatile_load_flat_from_group( ; CHECK: load volatile i32, i32 addrspace(4)* ; CHECK: store i32 %val, i32 addrspace(3)* -define void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { +define amdgpu_kernel void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)* %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4 @@ -38,7 +38,7 @@ define void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i ; CHECK-LABEL: @volatile_load_flat_from_private( ; CHECK: load volatile i32, i32 addrspace(4)* ; CHECK: store i32 %val, i32* -define void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocapture %output) #0 { +define amdgpu_kernel void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocapture %output) #0 { %tmp0 = addrspacecast i32* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32* %output to i32 addrspace(4)* %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4 @@ -49,7 +49,7 @@ define void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocaptu ; CHECK-LABEL: @volatile_store_flat_to_global( ; CHECK: load i32, i32 addrspace(1)* ; CHECK: store volatile i32 %val, i32 addrspace(4)* -define void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { +define amdgpu_kernel void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %tmp0, align 4 @@ -60,7 +60,7 @@ define void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i ; CHECK-LABEL: @volatile_store_flat_to_group( ; CHECK: load i32, i32 addrspace(3)* ; CHECK: store volatile i32 %val, i32 addrspace(4)* -define void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { +define amdgpu_kernel void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %tmp0, align 4 @@ -71,7 +71,7 @@ define void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i3 ; CHECK-LABEL: @volatile_store_flat_to_private( ; CHECK: load i32, i32* ; CHECK: store volatile i32 %val, i32 addrspace(4)* -define void @volatile_store_flat_to_private(i32* nocapture %input, i32* nocapture %output) #0 { +define amdgpu_kernel void @volatile_store_flat_to_private(i32* nocapture %input, i32* nocapture %output) #0 { %tmp0 = addrspacecast i32* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32* %output to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %tmp0, align 4 @@ -119,7 +119,7 @@ define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, ; CHECK-LABEL: @volatile_memset_group_to_flat( ; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* ; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true) -define void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { +define amdgpu_kernel void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true) ret void @@ -128,7 +128,7 @@ define void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) ; CHECK-LABEL: @volatile_memset_global_to_flat( ; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* ; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true) -define void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { +define amdgpu_kernel void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true) ret void |