diff options
Diffstat (limited to 'llvm/test/Transforms')
35 files changed, 214 insertions, 214 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll index 6cec253bbf9..2bcb3a9d1e3 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll @@ -5,7 +5,7 @@ ; ASC-NOT: ptrtoint ; ASC-NOT: inttoptr -define void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 { +define amdgpu_kernel void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 { bb: %tmp = getelementptr inbounds float, float addrspace(3)* %arg2, i32 16 %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/llvm/test/Transforms/IndVarSimplify/AMDGPU/no-widen-to-i64.ll b/llvm/test/Transforms/IndVarSimplify/AMDGPU/no-widen-to-i64.ll index aa4fb8e68eb..36c7bd9c5ec 100644 --- a/llvm/test/Transforms/IndVarSimplify/AMDGPU/no-widen-to-i64.ll +++ b/llvm/test/Transforms/IndVarSimplify/AMDGPU/no-widen-to-i64.ll @@ -14,7 +14,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK-LABEL: @indvar_32_bit( ; CHECK-NOT: sext i32 ; CHECK: phi i32 -define void @indvar_32_bit(i32 %n, i32* nocapture %output) { +define amdgpu_kernel void @indvar_32_bit(i32 %n, i32* nocapture %output) { entry: %cmp5 = icmp sgt i32 %n, 0 br i1 %cmp5, label %for.body.preheader, label %for.end @@ -46,7 +46,7 @@ for.end: ; preds = %for.end.loopexit, % ; CHECK-NOT: ashr i64 ; CHECK-NOT: mul nsw i64 ; CHECK-NOT: add nsw i64 -define void @no_promote_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { +define amdgpu_kernel void @no_promote_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { entry: br label %for.body @@ -72,7 +72,7 @@ for.end: ; be legalized anyway. ; CHECK-LABEL: @indvar_48_bit( -define void @indvar_48_bit(i48 %n, i48* nocapture %output) { +define amdgpu_kernel void @indvar_48_bit(i48 %n, i48* nocapture %output) { entry: %cmp5 = icmp sgt i48 %n, 0 br i1 %cmp5, label %for.body.preheader, label %for.end diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll index 67b4ccda1a1..b566c147e9b 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll @@ -45,7 +45,7 @@ define float @load_private_from_flat(float addrspace(4)* %generic_scalar) #0 { ; CHECK-LABEL: @store_global_from_flat( ; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)* ; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* %tmp0 -define void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 { +define amdgpu_kernel void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 { %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)* store float 0.0, float addrspace(1)* %tmp0 ret void @@ -54,7 +54,7 @@ define void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 { ; CHECK-LABEL: @store_group_from_flat( ; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)* ; CHECK-NEXT: store float 0.000000e+00, float addrspace(3)* %tmp0 -define void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 { +define amdgpu_kernel void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 { %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)* store float 0.0, float addrspace(3)* %tmp0 ret void @@ -63,7 +63,7 @@ define void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 { ; CHECK-LABEL: @store_private_from_flat( ; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float* ; CHECK-NEXT: store float 0.000000e+00, float* %tmp0 -define void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 { +define amdgpu_kernel void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 { %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float* store float 0.0, float* %tmp0 ret void @@ -74,7 +74,7 @@ define void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 { ; CHECK-NEXT: %val = load i32, i32 addrspace(1)* %input, align 4 ; CHECK-NEXT: store i32 %val, i32 addrspace(1)* %output, align 4 ; CHECK-NEXT: ret void -define void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { +define amdgpu_kernel void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %tmp0, align 4 @@ -87,7 +87,7 @@ define void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace ; CHECK-NEXT: %val = load i32, i32 addrspace(3)* %input, align 4 ; CHECK-NEXT: store i32 %val, i32 addrspace(3)* %output, align 4 ; CHECK-NEXT: ret void -define void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { +define amdgpu_kernel void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %tmp0, align 4 @@ -100,7 +100,7 @@ define void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace( ; CHECK-NEXT: %val = load i32, i32* %input, align 4 ; CHECK-NEXT: store i32 %val, i32* %output, align 4 ; CHECK-NEXT: ret void -define void @load_store_private(i32* nocapture %input, i32* nocapture %output) #0 { +define amdgpu_kernel void @load_store_private(i32* nocapture %input, i32* nocapture %output) #0 { %tmp0 = addrspacecast i32* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32* %output to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %tmp0, align 4 @@ -113,7 +113,7 @@ define void @load_store_private(i32* nocapture %input, i32* nocapture %output) # ; CHECK-NEXT: %val = load i32, i32 addrspace(4)* %input, align 4 ; CHECK-NEXT: store i32 %val, i32 addrspace(4)* %output, align 4 ; CHECK-NEXT: ret void -define void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4)* nocapture %output) #0 { +define amdgpu_kernel void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4)* nocapture %output) #0 { %val = load i32, i32 addrspace(4)* %input, align 4 store i32 %val, i32 addrspace(4)* %output, align 4 ret void @@ -122,7 +122,7 @@ define void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4 ; CHECK-LABEL: @store_addrspacecast_ptr_value( ; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* ; CHECK-NEXT: store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4 -define void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32 addrspace(4)* addrspace(1)* nocapture %output) #0 { +define amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32 addrspace(4)* addrspace(1)* nocapture %output) #0 { %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4 ret void diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll index aad9db63269..52067cd37bb 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll @@ -28,7 +28,7 @@ ; CHECK: store float %v, float addrspace(3)* %tmp7, align 4 ; CHECK: call void @llvm.amdgcn.s.barrier() ; CHECK: ret void -define void @load_store_lds_f32(i32 %i, float %v) #0 { +define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 { bb: %tmp = load float, float addrspace(4)* addrspacecast (float addrspace(3)* @scalar to float addrspace(4)*), align 4 call void @use(float %tmp) @@ -83,7 +83,7 @@ bb: ; CHECK-LABEL: @nested_const_expr( ; CHECK: store i32 1, i32 addrspace(3)* bitcast (float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i64 0, i64 1) to i32 addrspace(3)*), align 4 -define void @nested_const_expr() #0 { +define amdgpu_kernel void @nested_const_expr() #0 { store i32 1, i32 addrspace(4)* bitcast (float addrspace(4)* getelementptr ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i64 0, i64 1) to i32 addrspace(4)*), align 4 ret void } @@ -93,7 +93,7 @@ define void @nested_const_expr() #0 { ; CHECK-NEXT: %v = load float, float addrspace(1)* %addr ; CHECK-NEXT: store float %v, float addrspace(1)* %addr ; CHECK-NEXT: ret void -define void @rauw(float addrspace(1)* %input) #0 { +define amdgpu_kernel void @rauw(float addrspace(1)* %input) #0 { bb: %generic_input = addrspacecast float addrspace(1)* %input to float addrspace(4)* %addr = getelementptr float, float addrspace(4)* %generic_input, i64 10 @@ -117,7 +117,7 @@ bb: ; CHECK: %exit_cond = icmp eq float addrspace(3)* %i2, %end ; CHECK: br i1 %exit_cond, label %exit, label %loop -define void @loop() #0 { +define amdgpu_kernel void @loop() #0 { entry: %p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)* %end = getelementptr float, float addrspace(4)* %p, i64 10 @@ -150,7 +150,7 @@ exit: ; preds = %loop ; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float addrspace(4)* ; CHECK: %exit_cond = icmp eq float addrspace(4)* %0, %end ; CHECK: br i1 %exit_cond, label %exit, label %loop -define void @loop_with_generic_bound() #0 { +define amdgpu_kernel void @loop_with_generic_bound() #0 { entry: %p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)* %end = load float addrspace(4)*, float addrspace(4)* addrspace(1)* @generic_end diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll index afd1493fc0e..557a80f1a5d 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll @@ -2,7 +2,7 @@ ; CHECK-LABEL: @memset_group_to_flat( ; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { +define amdgpu_kernel void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void @@ -10,7 +10,7 @@ define void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { ; CHECK-LABEL: @memset_global_to_flat( ; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { +define amdgpu_kernel void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void @@ -18,7 +18,7 @@ define void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { ; CHECK-LABEL: @memset_group_to_flat_no_md( ; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 4, i64 %size, i32 4, i1 false){{$}} -define void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 { +define amdgpu_kernel void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 { %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false) ret void @@ -26,7 +26,7 @@ define void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) ; CHECK-LABEL: @memset_global_to_flat_no_md( ; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %global.ptr, i8 4, i64 %size, i32 4, i1 false){{$}} -define void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 { +define amdgpu_kernel void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 { %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 %size, i32 4, i1 false) ret void @@ -34,7 +34,7 @@ define void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size ; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group( ; CHCK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { +define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void @@ -42,7 +42,7 @@ define void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, ; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group( ; CHECK: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 { +define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 { %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %src.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void @@ -50,7 +50,7 @@ define void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest. ; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group( ; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %src.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { +define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 @@ -59,7 +59,7 @@ define void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %d ; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global( ; CHECK: call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 { +define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8 addrspace(4)* %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast.dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 @@ -68,7 +68,7 @@ define void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* ; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global( ; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 { +define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 { %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* %cast.dest, i8 addrspace(3)* %src.group.ptr, i32 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void @@ -76,7 +76,7 @@ define void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.glo ; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct( ; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa.struct !7 -define void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { +define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa.struct !7 ret void @@ -84,7 +84,7 @@ define void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace ; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md( ; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}} -define void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { +define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false) ret void @@ -93,7 +93,7 @@ define void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* % ; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md( ; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}} ; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false){{$}} -define void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { +define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest0, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false) call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %dest1, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false) @@ -103,14 +103,14 @@ define void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrsp ; Check for iterator problems if the pointer has 2 uses in the same call ; CHECK-LABEL: @memcpy_group_flat_to_flat_self( ; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* %group.ptr, i8 addrspace(3)* %group.ptr, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 { +define amdgpu_kernel void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 { %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* %cast, i8 addrspace(4)* %cast, i64 32, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void } ; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group( ; CHECK: call void @llvm.memmove.p4i8.p3i8.i64(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { +define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* call void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* %dest, i8 addrspace(4)* %cast.src, i64 %size, i32 4, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll index 17997052f07..3231b6ccf1c 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll @@ -9,7 +9,7 @@ ; CHECK-LABEL: @generic_address_bitcast_const( ; CHECK: %vecload1 = load <2 x double>, <2 x double> addrspace(1)* bitcast (double addrspace(1)* getelementptr inbounds ([100 x double], [100 x double] addrspace(1)* @data, i64 0, i64 4) to <2 x double> addrspace(1)*), align 8 -define void @generic_address_bitcast_const(i64 %arg0, i32 addrspace(1)* nocapture %results) #0 { +define amdgpu_kernel void @generic_address_bitcast_const(i64 %arg0, i32 addrspace(1)* nocapture %results) #0 { entry: %tmp1 = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = zext i32 %tmp1 to i64 @@ -39,7 +39,7 @@ declare i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)*) ; CHECK: %tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32 addrspace(3)* ; CHECK: %add.ptr = getelementptr inbounds i32, i32 addrspace(3)* %tmp1, i32 2 ; CHECK: %tmp2 = load i32, i32 addrspace(3)* %add.ptr, align 4 -define void @generic_address_pipe_bug9673(%opencl.pipe_t addrspace(3)* nocapture %in_pipe, i32 addrspace(1)* nocapture %dst) #0 { +define amdgpu_kernel void @generic_address_pipe_bug9673(%opencl.pipe_t addrspace(3)* nocapture %in_pipe, i32 addrspace(1)* nocapture %dst) #0 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32 addrspace(3)* @@ -55,7 +55,7 @@ entry: ; CHECK: br i1 ; CHECK: load float, float addrspace(4)* ; CHECK: br label -define void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 { +define amdgpu_kernel void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 { entry: %ptr = alloca float addrspace(4)*, align 8 %tmp = call i32 @llvm.amdgcn.workitem.id.x() @@ -85,7 +85,7 @@ helperFunction.exit: ; preds = %if.end.i, %entry ; CHECK-LABEL: @generic_address_opt_phi_bug9776_simple_phi_kernel( ; CHECK: phi i32 addrspace(3)* ; CHECK: store i32 %i.03, i32 addrspace(3)* % -define void @generic_address_opt_phi_bug9776_simple_phi_kernel(i32 addrspace(3)* nocapture %in, i32 %numElems) #0 { +define amdgpu_kernel void @generic_address_opt_phi_bug9776_simple_phi_kernel(i32 addrspace(3)* nocapture %in, i32 %numElems) #0 { entry: %cmp1 = icmp eq i32 %numElems, 0 br i1 %cmp1, label %for.end, label %for.body.lr.ph @@ -110,7 +110,7 @@ for.end: ; preds = %for.body, %entry ; CHECK-LABEL: @generic_address_bug9899( ; CHECK: %vecload = load <2 x i32>, <2 x i32> addrspace(3)* ; CHECK: store <2 x i32> %tmp16, <2 x i32> addrspace(3)* -define void @generic_address_bug9899(i64 %arg0, i32 addrspace(3)* nocapture %sourceA, i32 addrspace(3)* nocapture %destValues) #0 { +define amdgpu_kernel void @generic_address_bug9899(i64 %arg0, i32 addrspace(3)* nocapture %sourceA, i32 addrspace(3)* nocapture %destValues) #0 { entry: %tmp1 = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = zext i32 %tmp1 to i64 diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll index bcbca16d7af..08edc20ecf9 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll @@ -18,7 +18,7 @@ define i32 addrspace(4)* @return_select_group_flat(i1 %c, i32 addrspace(3)* %gro ; CHECK-LABEL: @store_select_group_flat( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1 ; CHECK: store i32 -1, i32 addrspace(3)* %select -define void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { +define amdgpu_kernel void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1 @@ -43,7 +43,7 @@ define i32 @load_select_group_flat_md(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 ; CHECK: %2 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)* ; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* %2 ; CHECK: store i32 -1, i32 addrspace(4)* %select -define void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32* %private.ptr.1) #0 { +define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32* %private.ptr.1) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %cast1 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1 @@ -73,7 +73,7 @@ bb: ; CHECK-LABEL: @store_select_group_flat_null( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) ; CHECK: store i32 -1, i32 addrspace(3)* %select -define void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null store i32 -1, i32 addrspace(4)* %select @@ -83,7 +83,7 @@ define void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) ; CHECK-LABEL: @store_select_group_flat_null_swap( ; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0 ; CHECK: store i32 -1, i32 addrspace(3)* %select -define void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* null, i32 addrspace(4)* %cast0 store i32 -1, i32 addrspace(4)* %select @@ -93,7 +93,7 @@ define void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.p ; CHECK-LABEL: @store_select_group_flat_undef( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* undef ; CHECK: store i32 -1, i32 addrspace(3)* %select -define void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* undef store i32 -1, i32 addrspace(4)* %select @@ -103,7 +103,7 @@ define void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0 ; CHECK-LABEL: @store_select_group_flat_undef_swap( ; CHECK: %select = select i1 %c, i32 addrspace(3)* undef, i32 addrspace(3)* %group.ptr.0 ; CHECK: store i32 -1, i32 addrspace(3)* %select -define void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* undef, i32 addrspace(4)* %cast0 store i32 -1, i32 addrspace(4)* %select @@ -114,7 +114,7 @@ define void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group. ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) ; CHECK: %gep = getelementptr i32, i32 addrspace(3)* %select, i64 16 ; CHECK: store i32 -1, i32 addrspace(3)* %gep -define void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null %gep = getelementptr i32, i32 addrspace(4)* %select, i64 16 @@ -127,7 +127,7 @@ define void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.pt ; CHECK-LABEL: @store_select_group_flat_constexpr( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* @lds1 ; CHECK: store i32 7, i32 addrspace(3)* %select -define void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds1 to i32 addrspace(4)*) store i32 7, i32 addrspace(4)* %select @@ -137,7 +137,7 @@ define void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.p ; CHECK-LABEL: @store_select_group_flat_inttoptr_flat( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*) to i32 addrspace(3)*) ; CHECK: store i32 7, i32 addrspace(3)* %select -define void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*) store i32 7, i32 addrspace(4)* %select @@ -147,7 +147,7 @@ define void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %gro ; CHECK-LABEL: @store_select_group_flat_inttoptr_group( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) ; CHECK-NEXT: store i32 7, i32 addrspace(3)* %select -define void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32 addrspace(4)*) store i32 7, i32 addrspace(4)* %select @@ -158,7 +158,7 @@ define void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %gr ; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* ; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*) ; CHECK: store i32 7, i32 addrspace(4)* %select -define void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*) store i32 7, i32 addrspace(4)* %select @@ -169,7 +169,7 @@ define void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrsp ; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* ; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %1 ; CHECK: store i32 7, i32 addrspace(4)* %select -define void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %cast0 store i32 7, i32 addrspace(4)* %select @@ -179,7 +179,7 @@ define void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 a ; CHECK-LABEL: @store_select_group_global_mismatch_null_null( ; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*) ; CHECK: store i32 7, i32 addrspace(4)* %select -define void @store_select_group_global_mismatch_null_null(i1 %c) #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 { %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*) store i32 7, i32 addrspace(4)* %select ret void @@ -187,42 +187,42 @@ define void @store_select_group_global_mismatch_null_null(i1 %c) #0 { ; CHECK-LABEL: @store_select_group_global_mismatch_null_null_constexpr( ; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 -define void @store_select_group_global_mismatch_null_null_constexpr() #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_null_null_constexpr() #0 { store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_gv_null_constexpr( ; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 -define void @store_select_group_global_mismatch_gv_null_constexpr() #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_gv_null_constexpr() #0 { store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_null_gv_constexpr( ; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4 -define void @store_select_group_global_mismatch_null_gv_constexpr() #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_null_gv_constexpr() #0 { store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_null_constexpr( ; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 -define void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 { store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_flat_null_constexpr( ; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4 -define void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 { store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr( ; CHECK: store i32 7, i32 addrspace(3)* null -define void @store_select_group_global_mismatch_undef_undef_constexpr() #0 { +define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 { store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* undef to i32 addrspace(4)*)), align 4 ret void } @@ -233,7 +233,7 @@ define void @store_select_group_global_mismatch_undef_undef_constexpr() #0 { ; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* ; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*) ; CHECK: store i32 7, i32 addrspace(4)* %select -define void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { +define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*) store i32 7, i32 addrspace(4)* %select @@ -248,7 +248,7 @@ define void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %gro ; CHECK: %extract1 = extractelement <2 x i32 addrspace(4)*> %select, i32 1 ; CHECK: store i32 -1, i32 addrspace(4)* %extract0 ; CHECK: store i32 -2, i32 addrspace(4)* %extract1 -define void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 { +define amdgpu_kernel void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 { %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*> %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*> %select = select i1 %c, <2 x i32 addrspace(4)*> %cast0, <2 x i32 addrspace(4)*> %cast1 diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll index d9b80e99bf0..79bf92610a8 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: @volatile_load_flat_from_global( ; CHECK: load volatile i32, i32 addrspace(4)* ; CHECK: store i32 %val, i32 addrspace(1)* -define void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { +define amdgpu_kernel void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)* %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4 @@ -16,7 +16,7 @@ define void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, ; CHECK-LABEL: @volatile_load_flat_from_constant( ; CHECK: load volatile i32, i32 addrspace(4)* ; CHECK: store i32 %val, i32 addrspace(1)* -define void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { +define amdgpu_kernel void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(2)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)* %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4 @@ -27,7 +27,7 @@ define void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input ; CHECK-LABEL: @volatile_load_flat_from_group( ; CHECK: load volatile i32, i32 addrspace(4)* ; CHECK: store i32 %val, i32 addrspace(3)* -define void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { +define amdgpu_kernel void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)* %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4 @@ -38,7 +38,7 @@ define void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i ; CHECK-LABEL: @volatile_load_flat_from_private( ; CHECK: load volatile i32, i32 addrspace(4)* ; CHECK: store i32 %val, i32* -define void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocapture %output) #0 { +define amdgpu_kernel void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocapture %output) #0 { %tmp0 = addrspacecast i32* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32* %output to i32 addrspace(4)* %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4 @@ -49,7 +49,7 @@ define void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocaptu ; CHECK-LABEL: @volatile_store_flat_to_global( ; CHECK: load i32, i32 addrspace(1)* ; CHECK: store volatile i32 %val, i32 addrspace(4)* -define void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { +define amdgpu_kernel void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %tmp0, align 4 @@ -60,7 +60,7 @@ define void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i ; CHECK-LABEL: @volatile_store_flat_to_group( ; CHECK: load i32, i32 addrspace(3)* ; CHECK: store volatile i32 %val, i32 addrspace(4)* -define void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { +define amdgpu_kernel void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %tmp0, align 4 @@ -71,7 +71,7 @@ define void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i3 ; CHECK-LABEL: @volatile_store_flat_to_private( ; CHECK: load i32, i32* ; CHECK: store volatile i32 %val, i32 addrspace(4)* -define void @volatile_store_flat_to_private(i32* nocapture %input, i32* nocapture %output) #0 { +define amdgpu_kernel void @volatile_store_flat_to_private(i32* nocapture %input, i32* nocapture %output) #0 { %tmp0 = addrspacecast i32* %input to i32 addrspace(4)* %tmp1 = addrspacecast i32* %output to i32 addrspace(4)* %val = load i32, i32 addrspace(4)* %tmp0, align 4 @@ -119,7 +119,7 @@ define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, ; CHECK-LABEL: @volatile_memset_group_to_flat( ; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* ; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true) -define void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { +define amdgpu_kernel void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true) ret void @@ -128,7 +128,7 @@ define void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) ; CHECK-LABEL: @volatile_memset_global_to_flat( ; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* ; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %1, i8 4, i64 32, i32 4, i1 true) -define void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { +define amdgpu_kernel void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* call void @llvm.memset.p4i8.i64(i8 addrspace(4)* %cast, i8 4, i64 32, i32 4, i1 true) ret void diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/aa-metadata.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/aa-metadata.ll index e6904ee50bc..4b2dab47a20 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/aa-metadata.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/aa-metadata.ll @@ -15,7 +15,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; NOSCOPE: load float ; NOSCOPE: store float ; NOSCOPE: store float -define void @vectorize_alias_scope(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { +define amdgpu_kernel void @vectorize_alias_scope(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { entry: %a.idx.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 store float 0.0, float addrspace(1)* %a, align 4, !noalias !0 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll index d8f72a8e1df..368dc6ab361 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll @@ -10,7 +10,7 @@ target triple = "amdgcn--" ; ALIGNED: load i8, i8* %ptr0, align 1{{$}} ; ALIGNED: load i8, i8* %ptr1, align 1{{$}} -define void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i8], align 1 %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset %val0 = load i8, i8* %ptr0, align 1 @@ -27,7 +27,7 @@ define void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %o ; ALIGNED: load i16, i16* %ptr0, align 1{{$}} ; ALIGNED: load i16, i16* %ptr1, align 1{{$}} -define void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i16], align 1 %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset %val0 = load i16, i16* %ptr0, align 1 @@ -47,7 +47,7 @@ define void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 ; ALIGNED: load i32, i32* %ptr0, align 1 ; ALIGNED: load i32, i32* %ptr1, align 1 -define void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i32], align 1 %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset %val0 = load i32, i32* %ptr0, align 1 @@ -68,7 +68,7 @@ define void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 ; FIXME: Should change alignment ; ALIGNED: load i32 ; ALIGNED: load i32 -define void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i32], align 16 %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset %val0 = load i32, i32* %ptr0, align 1 @@ -85,7 +85,7 @@ define void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias % ; ALIGNED: store i8 9, i8* %ptr0, align 1{{$}} ; ALIGNED: store i8 10, i8* %ptr1, align 1{{$}} -define void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i8], align 1 %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset store i8 9, i8* %ptr0, align 1 @@ -100,7 +100,7 @@ define void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 % ; ALIGNED: store i16 9, i16* %ptr0, align 1{{$}} ; ALIGNED: store i16 10, i16* %ptr1, align 1{{$}} -define void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i16], align 1 %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset store i16 9, i16* %ptr0, align 1 @@ -119,7 +119,7 @@ define void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 ; ALIGNED: store i32 9, i32* %ptr0, align 1 ; ALIGNED: store i32 10, i32* %ptr1, align 1 -define void @store_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @store_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i32], align 1 %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset store i32 9, i32* %ptr0, align 1 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/extended-index.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/extended-index.ll index 25abb98c6eb..8a75b8743fa 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/extended-index.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/extended-index.ll @@ -8,7 +8,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; CHECK: sext i32 %id.x to i64 ; CHECK: load <2 x float> ; CHECK: store <2 x float> zeroinitializer -define void @basic_merge_sext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { +define amdgpu_kernel void @basic_merge_sext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { entry: %id.x = call i32 @llvm.amdgcn.workitem.id.x() %sext.id.x = sext i32 %id.x to i64 @@ -32,7 +32,7 @@ entry: ; CHECK: zext i32 %id.x to i64 ; CHECK: load <2 x float> ; CHECK: store <2 x float> -define void @basic_merge_zext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { +define amdgpu_kernel void @basic_merge_zext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { entry: %id.x = call i32 @llvm.amdgcn.workitem.id.x() %zext.id.x = zext i32 %id.x to i64 @@ -54,7 +54,7 @@ entry: ; CHECK-LABEL: @merge_op_zext_index( ; CHECK: load <2 x float> ; CHECK: store <2 x float> -define void @merge_op_zext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 { +define amdgpu_kernel void @merge_op_zext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 { entry: %id.x = call i32 @llvm.amdgcn.workitem.id.x() %shl = shl i32 %id.x, 2 @@ -81,7 +81,7 @@ entry: ; CHECK-LABEL: @merge_op_sext_index( ; CHECK: load <2 x float> ; CHECK: store <2 x float> -define void @merge_op_sext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 { +define amdgpu_kernel void @merge_op_sext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 { entry: %id.x = call i32 @llvm.amdgcn.workitem.id.x() %shl = shl i32 %id.x, 2 @@ -112,7 +112,7 @@ entry: ; CHECK: loop: ; CHECK: load <2 x i32> ; CHECK: store <2 x i32> -define void @zext_trunc_phi_1(i32 addrspace(1)* nocapture noalias %a, i32 addrspace(1)* nocapture noalias %b, i32 addrspace(1)* nocapture readonly noalias %c, i32 %n, i64 %arst, i64 %aoeu) #0 { +define amdgpu_kernel void @zext_trunc_phi_1(i32 addrspace(1)* nocapture noalias %a, i32 addrspace(1)* nocapture noalias %b, i32 addrspace(1)* nocapture readonly noalias %c, i32 %n, i64 %arst, i64 %aoeu) #0 { entry: %cmp0 = icmp eq i32 %n, 0 br i1 %cmp0, label %exit, label %loop diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll index 2b2f9cbcf50..6182c09abcf 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll @@ -11,7 +11,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK: load <2 x float> ; CHECK: %w = add i32 %y, 9 ; CHECK: %foo = add i32 %z, %w -define void @insert_load_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 { +define amdgpu_kernel void @insert_load_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 { entry: %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %idx @@ -38,7 +38,7 @@ entry: ; CHECK: %w = add i32 %y, 9 ; CHECK: store <2 x float> ; CHECK: %foo = add i32 %z, %w -define void @insert_store_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 { +define amdgpu_kernel void @insert_store_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 { entry: %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %idx diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll index 4d6240a9aa9..3f6d7ee7dca 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll @@ -8,7 +8,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK: store double 0.000000e+00, double addrspace(1)* %a, ; CHECK: load double ; CHECK: store double 0.000000e+00, double addrspace(1)* %a.idx.1 -define void @interleave(double addrspace(1)* nocapture %a, double addrspace(1)* nocapture %b, double addrspace(1)* nocapture readonly %c) #0 { +define amdgpu_kernel void @interleave(double addrspace(1)* nocapture %a, double addrspace(1)* nocapture %b, double addrspace(1)* nocapture readonly %c) #0 { entry: %a.idx.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 %c.idx.1 = getelementptr inbounds double, double addrspace(1)* %c, i64 1 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll index c85be874376..0fcdc7b9083 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll @@ -17,7 +17,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; ELT8-UNALIGNED: store <2 x i32> ; ELT16-UNALIGNED: store <4 x i32> -define void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 { %out.gep.1 = getelementptr i32, i32* %out, i32 1 %out.gep.2 = getelementptr i32, i32* %out, i32 2 %out.gep.3 = getelementptr i32, i32* %out, i32 3 @@ -44,7 +44,7 @@ define void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 { ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 -define void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32* %out) #0 { %out.gep.1 = getelementptr i32, i32* %out, i32 1 %out.gep.2 = getelementptr i32, i32* %out, i32 2 %out.gep.3 = getelementptr i32, i32* %out, i32 3 @@ -71,7 +71,7 @@ define void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32* %out) #0 ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 -define void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32* %out) #0 { %out.gep.1 = getelementptr i32, i32* %out, i32 1 %out.gep.2 = getelementptr i32, i32* %out, i32 2 %out.gep.3 = getelementptr i32, i32* %out, i32 3 @@ -85,7 +85,7 @@ define void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32* %out) #0 ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i8( ; ALL: store <4 x i8> -define void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 { %out.gep.1 = getelementptr i8, i8* %out, i32 1 %out.gep.2 = getelementptr i8, i8* %out, i32 2 %out.gep.3 = getelementptr i8, i8* %out, i32 3 @@ -104,7 +104,7 @@ define void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 { ; ALIGNED: store i8 ; UNALIGNED: store <4 x i8> <i8 9, i8 1, i8 23, i8 19>, <4 x i8>* %1, align 1 -define void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8* %out) #0 { %out.gep.1 = getelementptr i8, i8* %out, i32 1 %out.gep.2 = getelementptr i8, i8* %out, i32 2 %out.gep.3 = getelementptr i8, i8* %out, i32 3 @@ -118,7 +118,7 @@ define void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8* %out) #0 { ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16( ; ALL: store <2 x i16> -define void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 { %out.gep.1 = getelementptr i16, i16* %out, i32 1 store i16 9, i16* %out, align 4 @@ -131,7 +131,7 @@ define void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 { ; ALIGNED: store i16 ; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 2 -define void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16* %out) #0 { %out.gep.1 = getelementptr i16, i16* %out, i32 1 store i16 9, i16* %out, align 2 @@ -144,7 +144,7 @@ define void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16* %out) #0 ; ALIGNED: store i16 ; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 1 -define void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16* %out) #0 { %out.gep.1 = getelementptr i16, i16* %out, i32 1 store i16 9, i16* %out, align 1 @@ -154,7 +154,7 @@ define void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16* %out) #0 ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16_align8( ; ALL: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 8 -define void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16* %out) #0 { %out.gep.1 = getelementptr i16, i16* %out, i32 1 store i16 9, i16* %out, align 8 @@ -179,7 +179,7 @@ define void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16* %out) #0 ; ELT16-ALIGNED: store i32 ; ELT16-UNALIGNED: store <3 x i32> -define void @merge_private_store_3_vector_elts_loads_v4i32(i32* %out) #0 { +define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32* %out) #0 { %out.gep.1 = getelementptr i32, i32* %out, i32 1 %out.gep.2 = getelementptr i32, i32* %out, i32 2 @@ -202,7 +202,7 @@ define void @merge_private_store_3_vector_elts_loads_v4i32(i32* %out) #0 { ; ELT8-UNALIGNED: store i32 ; ELT16-UNALIGNED: store <3 x i32> -define void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32* %out) #0 { +define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32* %out) #0 { %out.gep.1 = getelementptr i32, i32* %out, i32 1 %out.gep.2 = getelementptr i32, i32* %out, i32 2 @@ -218,7 +218,7 @@ define void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32* %out) #0 ; ALIGNED: store i8 ; UNALIGNED: store <3 x i8> -define void @merge_private_store_3_vector_elts_loads_v4i8_align1(i8* %out) #0 { +define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i8_align1(i8* %out) #0 { %out.gep.1 = getelementptr i8, i8* %out, i8 1 %out.gep.2 = getelementptr i8, i8* %out, i8 2 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll index d32387fa2c0..dbb7068eeae 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll @@ -10,7 +10,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK-LABEL: @merge_global_store_2_constants_i8( ; CHECK: store <2 x i8> <i8 -56, i8 123>, <2 x i8> addrspace(1)* %{{[0-9]+}}, align 2 -define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 store i8 123, i8 addrspace(1)* %out.gep.1 @@ -20,7 +20,7 @@ define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_2_constants_i8_natural_align ; CHECK: store <2 x i8> -define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 store i8 123, i8 addrspace(1)* %out.gep.1 @@ -30,7 +30,7 @@ define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %o ; CHECK-LABEL: @merge_global_store_2_constants_i16 ; CHECK: store <2 x i16> <i16 456, i16 123>, <2 x i16> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 store i16 123, i16 addrspace(1)* %out.gep.1 @@ -40,7 +40,7 @@ define void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_2_constants_0_i16 ; CHECK: store <2 x i16> zeroinitializer, <2 x i16> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 store i16 0, i16 addrspace(1)* %out.gep.1 @@ -50,7 +50,7 @@ define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_2_constants_i16_natural_align ; CHECK: store <2 x i16> -define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 store i16 123, i16 addrspace(1)* %out.gep.1 @@ -60,7 +60,7 @@ define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* ; CHECK-LABEL: @merge_global_store_2_constants_half_natural_align ; CHECK: store <2 x half> -define void @merge_global_store_2_constants_half_natural_align(half addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_half_natural_align(half addrspace(1)* %out) #0 { %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1 store half 2.0, half addrspace(1)* %out.gep.1 @@ -70,7 +70,7 @@ define void @merge_global_store_2_constants_half_natural_align(half addrspace(1) ; CHECK-LABEL: @merge_global_store_2_constants_i32 ; CHECK: store <2 x i32> <i32 456, i32 123>, <2 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 store i32 123, i32 addrspace(1)* %out.gep.1 @@ -80,7 +80,7 @@ define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_2_constants_i32_f32 ; CHECK: store <2 x i32> <i32 456, i32 1065353216>, <2 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.1.bc = bitcast i32 addrspace(1)* %out.gep.1 to float addrspace(1)* store float 1.0, float addrspace(1)* %out.gep.1.bc @@ -90,7 +90,7 @@ define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_2_constants_f32_i32 ; CHECK store <2 x float> <float 4.000000e+00, float 0x370EC00000000000>, <2 x float> addrspace(1)* %{{[0-9]+$}} -define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)* store i32 123, i32 addrspace(1)* %out.gep.1.bc @@ -100,7 +100,7 @@ define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 ; CHECK-LABEL: @merge_global_store_4_constants_i32 ; CHECK: store <4 x i32> <i32 1234, i32 123, i32 456, i32 333>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -114,7 +114,7 @@ define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_4_constants_f32_order ; CHECK: store <4 x float> <float 8.000000e+00, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>, <4 x float> addrspace(1)* %{{[0-9]+}} -define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 @@ -129,7 +129,7 @@ define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) ; First store is out of order. ; CHECK-LABEL: @merge_global_store_4_constants_f32 ; CHECK: store <4 x float> <float 8.000000e+00, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>, <4 x float> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 @@ -143,7 +143,7 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_4_constants_mixed_i32_f32 ; CHECK: store <4 x i32> <i32 1090519040, i32 11, i32 1073741824, i32 17>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 @@ -160,7 +160,7 @@ define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %o ; CHECK-LABEL: @merge_global_store_3_constants_i32 ; CHECK: store <3 x i32> <i32 1234, i32 123, i32 456>, <3 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 @@ -172,7 +172,7 @@ define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_2_constants_i64 ; CHECK: store <2 x i64> <i64 456, i64 123>, <2 x i64> addrspace(1)* %{{[0-9]+}}, align 8 -define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1 store i64 123, i64 addrspace(1)* %out.gep.1 @@ -183,7 +183,7 @@ define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_4_constants_i64 ; CHECK: store <2 x i64> <i64 456, i64 333>, <2 x i64> addrspace(1)* %{{[0-9]+}}, align 8 ; CHECK: store <2 x i64> <i64 1234, i64 123>, <2 x i64> addrspace(1)* %{{[0-9]+}}, align 8 -define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1 %out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2 %out.gep.3 = getelementptr i64, i64 addrspace(1)* %out, i64 3 @@ -202,7 +202,7 @@ define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 { ; CHECK: [[INSERT0:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[ELT0]], i32 0 ; CHECK: [[INSERT1:%[^ ]+]] = insertelement <2 x i32> [[INSERT0]], i32 [[ELT1]], i32 1 ; CHECK: store <2 x i32> [[INSERT1]] -define void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 @@ -220,7 +220,7 @@ define void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 ; CHECK: insertelement ; CHECK: insertelement ; CHECK: store <2 x i32> -define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3 @@ -241,7 +241,7 @@ define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace( ; CHECK: [[INSERT0:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[ELT1]], i32 0 ; CHECK: [[INSERT1:%[^ ]+]] = insertelement <2 x i32> [[INSERT0]], i32 [[ELT0]], i32 1 ; CHECK: store <2 x i32> [[INSERT1]] -define void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 @@ -256,7 +256,7 @@ define void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* % ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i32 ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> -define void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -279,7 +279,7 @@ define void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 ; CHECK-LABEL: @merge_global_store_3_adjacent_loads_i32 ; CHECK: load <3 x i32> ; CHECK: store <3 x i32> -define void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 @@ -298,7 +298,7 @@ define void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_f32 ; CHECK: load <4 x float> ; CHECK: store <4 x float> -define void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 @@ -321,7 +321,7 @@ define void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, f ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i32_nonzero_base ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> -define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12 %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 13 @@ -346,7 +346,7 @@ define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace( ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_inverse_i32 ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> -define void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -373,7 +373,7 @@ define void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* % ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_shuffle_i32 ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> -define void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -408,7 +408,7 @@ define void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* % ; CHECK: insertelement <4 x i8> ; CHECK: insertelement <4 x i8> ; CHECK: store <4 x i8> -define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1 %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2 %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3 @@ -431,7 +431,7 @@ define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 ad ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i8_natural_align ; CHECK: load <4 x i8> ; CHECK: store <4 x i8> -define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1 %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2 %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3 @@ -454,7 +454,7 @@ define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1 ; CHECK-LABEL: @merge_global_store_4_vector_elts_loads_v4i32 ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> -define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -474,7 +474,7 @@ define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out ; CHECK-LABEL: @merge_local_store_2_constants_i8 ; CHECK: store <2 x i8> <i8 -56, i8 123>, <2 x i8> addrspace(3)* %{{[0-9]+}}, align 2 -define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { +define amdgpu_kernel void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1 store i8 123, i8 addrspace(3)* %out.gep.1 @@ -484,7 +484,7 @@ define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { ; CHECK-LABEL: @merge_local_store_2_constants_i32 ; CHECK: store <2 x i32> <i32 456, i32 123>, <2 x i32> addrspace(3)* %{{[0-9]+}}, align 4 -define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { +define amdgpu_kernel void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 store i32 123, i32 addrspace(3)* %out.gep.1 @@ -495,7 +495,7 @@ define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { ; CHECK-LABEL: @merge_local_store_2_constants_i32_align_2 ; CHECK: store i32 ; CHECK: store i32 -define void @merge_local_store_2_constants_i32_align_2(i32 addrspace(3)* %out) #0 { +define amdgpu_kernel void @merge_local_store_2_constants_i32_align_2(i32 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 store i32 123, i32 addrspace(3)* %out.gep.1, align 2 @@ -506,7 +506,7 @@ define void @merge_local_store_2_constants_i32_align_2(i32 addrspace(3)* %out) # ; CHECK-LABEL: @merge_local_store_4_constants_i32 ; CHECK: store <2 x i32> <i32 456, i32 333>, <2 x i32> addrspace(3)* ; CHECK: store <2 x i32> <i32 1234, i32 123>, <2 x i32> addrspace(3)* -define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 { +define amdgpu_kernel void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(3)* %out, i32 3 @@ -521,7 +521,7 @@ define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 { ; CHECK-LABEL: @merge_global_store_5_constants_i32 ; CHECK: store <4 x i32> <i32 9, i32 12, i32 16, i32 -12>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 ; CHECK: store i32 -define void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) { store i32 9, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 store i32 12, i32 addrspace(1)* %idx1, align 4 @@ -537,7 +537,7 @@ define void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) { ; CHECK-LABEL: @merge_global_store_6_constants_i32 ; CHECK: store <4 x i32> <i32 13, i32 15, i32 62, i32 63>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 ; CHECK: store <2 x i32> <i32 11, i32 123>, <2 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) { store i32 13, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 store i32 15, i32 addrspace(1)* %idx1, align 4 @@ -555,7 +555,7 @@ define void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) { ; CHECK-LABEL: @merge_global_store_7_constants_i32 ; CHECK: store <4 x i32> <i32 34, i32 999, i32 65, i32 33>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 ; CHECK: store <3 x i32> <i32 98, i32 91, i32 212>, <3 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { store i32 34, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 store i32 999, i32 addrspace(1)* %idx1, align 4 @@ -575,7 +575,7 @@ define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { ; CHECK-LABEL: @merge_global_store_8_constants_i32 ; CHECK: store <4 x i32> <i32 34, i32 999, i32 65, i32 33>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 ; CHECK: store <4 x i32> <i32 98, i32 91, i32 212, i32 999>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { store i32 34, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 store i32 999, i32 addrspace(1)* %idx1, align 4 @@ -597,7 +597,7 @@ define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { ; CHECK-LABEL: @copy_v3i32_align4 ; CHECK: %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4 ; CHECK: store <3 x i32> %vec, <3 x i32> addrspace(1)* %out -define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 { +define amdgpu_kernel void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 { %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4 store <3 x i32> %vec, <3 x i32> addrspace(1)* %out ret void @@ -606,7 +606,7 @@ define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> a ; CHECK-LABEL: @copy_v3i64_align4 ; CHECK: %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4 ; CHECK: store <3 x i64> %vec, <3 x i64> addrspace(1)* %out -define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 { +define amdgpu_kernel void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 { %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4 store <3 x i64> %vec, <3 x i64> addrspace(1)* %out ret void @@ -615,7 +615,7 @@ define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> a ; CHECK-LABEL: @copy_v3f32_align4 ; CHECK: %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4 ; CHECK: store <3 x float> -define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 { +define amdgpu_kernel void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 { %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4 %fadd = fadd <3 x float> %vec, <float 1.0, float 2.0, float 4.0> store <3 x float> %fadd, <3 x float> addrspace(1)* %out @@ -625,7 +625,7 @@ define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x floa ; CHECK-LABEL: @copy_v3f64_align4 ; CHECK: %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4 ; CHECK: store <3 x double> %fadd, <3 x double> addrspace(1)* %out -define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 { +define amdgpu_kernel void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 { %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4 %fadd = fadd <3 x double> %vec, <double 1.0, double 2.0, double 4.0> store <3 x double> %fadd, <3 x double> addrspace(1)* %out diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll index 8885d61014f..226147df66a 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll @@ -5,7 +5,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK-LABEL: @merge_v2i32_v2i32( ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> zeroinitializer -define void @merge_v2i32_v2i32(<2 x i32> addrspace(1)* nocapture %a, <2 x i32> addrspace(1)* nocapture readonly %b) #0 { +define amdgpu_kernel void @merge_v2i32_v2i32(<2 x i32> addrspace(1)* nocapture %a, <2 x i32> addrspace(1)* nocapture readonly %b) #0 { entry: %a.1 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %a, i64 1 %b.1 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %b, i64 1 @@ -22,7 +22,7 @@ entry: ; CHECK-LABEL: @merge_v1i32_v1i32( ; CHECK: load <2 x i32> ; CHECK: store <2 x i32> zeroinitializer -define void @merge_v1i32_v1i32(<1 x i32> addrspace(1)* nocapture %a, <1 x i32> addrspace(1)* nocapture readonly %b) #0 { +define amdgpu_kernel void @merge_v1i32_v1i32(<1 x i32> addrspace(1)* nocapture %a, <1 x i32> addrspace(1)* nocapture readonly %b) #0 { entry: %a.1 = getelementptr inbounds <1 x i32>, <1 x i32> addrspace(1)* %a, i64 1 %b.1 = getelementptr inbounds <1 x i32>, <1 x i32> addrspace(1)* %b, i64 1 @@ -41,7 +41,7 @@ entry: ; CHECK: load <3 x i32> ; CHECK: store <3 x i32> zeroinitializer ; CHECK: store <3 x i32> zeroinitializer -define void @no_merge_v3i32_v3i32(<3 x i32> addrspace(1)* nocapture %a, <3 x i32> addrspace(1)* nocapture readonly %b) #0 { +define amdgpu_kernel void @no_merge_v3i32_v3i32(<3 x i32> addrspace(1)* nocapture %a, <3 x i32> addrspace(1)* nocapture readonly %b) #0 { entry: %a.1 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a, i64 1 %b.1 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b, i64 1 @@ -58,7 +58,7 @@ entry: ; CHECK-LABEL: @merge_v2i16_v2i16( ; CHECK: load <4 x i16> ; CHECK: store <4 x i16> zeroinitializer -define void @merge_v2i16_v2i16(<2 x i16> addrspace(1)* nocapture %a, <2 x i16> addrspace(1)* nocapture readonly %b) #0 { +define amdgpu_kernel void @merge_v2i16_v2i16(<2 x i16> addrspace(1)* nocapture %a, <2 x i16> addrspace(1)* nocapture readonly %b) #0 { entry: %a.1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a, i64 1 %b.1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b, i64 1 @@ -76,7 +76,7 @@ entry: ; CHECK-LABEL: @merge_load_i32_v2i16( ; CHECK: load i32, ; CHECK: load <2 x i16> -define void @merge_load_i32_v2i16(i32 addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_i32_v2i16(i32 addrspace(1)* nocapture %a) #0 { entry: %a.1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 1 %a.1.cast = bitcast i32 addrspace(1)* %a.1 to <2 x i16> addrspace(1)* diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/missing-alignment.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/missing-alignment.ll index ba792f78353..f353106607d 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/missing-alignment.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/missing-alignment.ll @@ -7,7 +7,7 @@ ; CHECK-LABEL: @load_keep_base_alignment_missing_align( ; CHECK: load <2 x float>, <2 x float> addrspace(3)* %{{[0-9]+}}, align 4 -define void @load_keep_base_alignment_missing_align(float addrspace(1)* %out) { +define amdgpu_kernel void @load_keep_base_alignment_missing_align(float addrspace(1)* %out) { %ptr0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 11 %val0 = load float, float addrspace(3)* %ptr0 @@ -21,7 +21,7 @@ define void @load_keep_base_alignment_missing_align(float addrspace(1)* %out) { ; CHECK-LABEL: @store_keep_base_alignment_missing_align( ; CHECK: store <2 x float> zeroinitializer, <2 x float> addrspace(3)* %{{[0-9]+}}, align 4 -define void @store_keep_base_alignment_missing_align() { +define amdgpu_kernel void @store_keep_base_alignment_missing_align() { %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 1 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 2 store float 0.0, float addrspace(3)* %arrayidx0 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll index 88eca363902..8a78f3d7e9b 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll @@ -11,7 +11,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64: ; CHECK: store i32 0 ; CHECK: store i32 0 -define void @no_crash(i32 %arg) { +define amdgpu_kernel void @no_crash(i32 %arg) { %tmp2 = add i32 %arg, 14 %tmp3 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* @0, i32 0, i32 %tmp2 %tmp4 = add i32 %arg, 15 @@ -37,7 +37,7 @@ define void @no_crash(i32 %arg) { ; CHECK: load i32 ; CHECK: load i32 -define void @interleave_get_longest(i32 %arg) { +define amdgpu_kernel void @interleave_get_longest(i32 %arg) { %a1 = add i32 %arg, 1 %a2 = add i32 %arg, 2 %a3 = add i32 %arg, 3 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/no-implicit-float.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/no-implicit-float.ll index 4a429533df0..818189565b4 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/no-implicit-float.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/no-implicit-float.ll @@ -5,7 +5,7 @@ ; CHECK: store i32 ; CHECK: store i32 ; CHECK: store i32 -define void @no_implicit_float(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @no_implicit_float(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/optnone.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/optnone.ll index 141e20a1f83..28d29f8e813 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/optnone.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/optnone.ll @@ -3,7 +3,7 @@ ; CHECK-LABEL: @optnone( ; CHECK: store i32 ; CHECK: store i32 -define void @optnone(i32 addrspace(1)* %out) noinline optnone { +define amdgpu_kernel void @optnone(i32 addrspace(1)* %out) noinline optnone { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 store i32 123, i32 addrspace(1)* %out.gep.1 @@ -13,7 +13,7 @@ define void @optnone(i32 addrspace(1)* %out) noinline optnone { ; CHECK-LABEL: @do_opt( ; CHECK: store <2 x i32> -define void @do_opt(i32 addrspace(1)* %out) { +define amdgpu_kernel void @do_opt(i32 addrspace(1)* %out) { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 store i32 123, i32 addrspace(1)* %out.gep.1 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll index 202e988ea5f..65200b95d5e 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll @@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; CHECK: inttoptr i64 %{{[^ ]+}} to i8 addrspace(1)* ; CHECK: inttoptr i64 %{{[^ ]+}} to i8 addrspace(1)* ; CHECK: store <2 x i64> zeroinitializer -define void @merge_v2p1i8(i8 addrspace(1)* addrspace(1)* nocapture %a, i8 addrspace(1)* addrspace(1)* nocapture readonly %b) #0 { +define amdgpu_kernel void @merge_v2p1i8(i8 addrspace(1)* addrspace(1)* nocapture %a, i8 addrspace(1)* addrspace(1)* nocapture readonly %b) #0 { entry: %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1 %b.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, i64 1 @@ -28,7 +28,7 @@ entry: ; CHECK: inttoptr i32 %{{[^ ]+}} to i8 addrspace(3)* ; CHECK: inttoptr i32 %{{[^ ]+}} to i8 addrspace(3)* ; CHECK: store <2 x i32> zeroinitializer -define void @merge_v2p3i8(i8 addrspace(3)* addrspace(3)* nocapture %a, i8 addrspace(3)* addrspace(3)* nocapture readonly %b) #0 { +define amdgpu_kernel void @merge_v2p3i8(i8 addrspace(3)* addrspace(3)* nocapture %a, i8 addrspace(3)* addrspace(3)* nocapture readonly %b) #0 { entry: %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i64 1 %b.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, i64 1 @@ -46,7 +46,7 @@ entry: ; CHECK: load <2 x i64> ; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1 ; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)* -define void @merge_load_i64_ptr64(i64 addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_i64_ptr64(i64 addrspace(1)* nocapture %a) #0 { entry: %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)* @@ -61,7 +61,7 @@ entry: ; CHECK: load <2 x i64> ; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0 ; CHECK: inttoptr i64 [[ELT0]] to i8 addrspace(1)* -define void @merge_load_ptr64_i64(i64 addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_ptr64_i64(i64 addrspace(1)* nocapture %a) #0 { entry: %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 @@ -76,7 +76,7 @@ entry: ; CHECK: [[ELT0:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64 ; CHECK: insertelement <2 x i64> undef, i64 [[ELT0]], i32 0 ; CHECK: store <2 x i64> -define void @merge_store_ptr64_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, i64 %val1) #0 { +define amdgpu_kernel void @merge_store_ptr64_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, i64 %val1) #0 { entry: %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 @@ -92,7 +92,7 @@ entry: ; CHECK: [[ELT1:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64 ; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1]], i32 1 ; CHECK: store <2 x i64> -define void @merge_store_i64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(1)* %ptr1) #0 { +define amdgpu_kernel void @merge_store_i64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(1)* %ptr1) #0 { entry: %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1 %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to i64 addrspace(1)* @@ -107,7 +107,7 @@ entry: ; CHECK: load <2 x i32> ; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 1 ; CHECK: inttoptr i32 [[ELT1]] to i8 addrspace(3)* -define void @merge_load_i32_ptr32(i32 addrspace(3)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_i32_ptr32(i32 addrspace(3)* nocapture %a) #0 { entry: %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1 %a.1.cast = bitcast i32 addrspace(3)* %a.1 to i8 addrspace(3)* addrspace(3)* @@ -122,7 +122,7 @@ entry: ; CHECK: load <2 x i32> ; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 0 ; CHECK: inttoptr i32 [[ELT0]] to i8 addrspace(3)* -define void @merge_load_ptr32_i32(i32 addrspace(3)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_ptr32_i32(i32 addrspace(3)* nocapture %a) #0 { entry: %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)* %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1 @@ -137,7 +137,7 @@ entry: ; CHECK: [[ELT0:%[^ ]+]] = ptrtoint i8 addrspace(3)* %ptr0 to i32 ; CHECK: insertelement <2 x i32> undef, i32 [[ELT0]], i32 0 ; CHECK: store <2 x i32> -define void @merge_store_ptr32_i32(i32 addrspace(3)* nocapture %a, i8 addrspace(3)* %ptr0, i32 %val1) #0 { +define amdgpu_kernel void @merge_store_ptr32_i32(i32 addrspace(3)* nocapture %a, i8 addrspace(3)* %ptr0, i32 %val1) #0 { entry: %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)* %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1 @@ -152,7 +152,7 @@ entry: ; CHECK: [[ELT1:%[^ ]+]] = ptrtoint i8 addrspace(3)* %ptr1 to i32 ; CHECK: insertelement <2 x i32> %{{[^ ]+}}, i32 [[ELT1]], i32 1 ; CHECK: store <2 x i32> -define void @merge_store_i32_ptr32(i8 addrspace(3)* addrspace(3)* nocapture %a, i32 %val0, i8 addrspace(3)* %ptr1) #0 { +define amdgpu_kernel void @merge_store_i32_ptr32(i8 addrspace(3)* addrspace(3)* nocapture %a, i32 %val0, i8 addrspace(3)* %ptr1) #0 { entry: %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i32 1 %a.cast = bitcast i8 addrspace(3)* addrspace(3)* %a to i32 addrspace(3)* @@ -166,7 +166,7 @@ entry: ; CHECK-LABEL: @no_merge_store_ptr32_i64( ; CHECK: store i8 addrspace(3)* ; CHECK: store i64 -define void @no_merge_store_ptr32_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(3)* %ptr0, i64 %val1) #0 { +define amdgpu_kernel void @no_merge_store_ptr32_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(3)* %ptr0, i64 %val1) #0 { entry: %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)* %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 @@ -181,7 +181,7 @@ entry: ; CHECK-LABEL: @no_merge_store_i64_ptr32( ; CHECK: store i64 ; CHECK: store i8 addrspace(3)* -define void @no_merge_store_i64_ptr32(i8 addrspace(3)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(3)* %ptr1) #0 { +define amdgpu_kernel void @no_merge_store_i64_ptr32(i8 addrspace(3)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(3)* %ptr1) #0 { entry: %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a, i64 1 %a.cast = bitcast i8 addrspace(3)* addrspace(1)* %a to i64 addrspace(1)* @@ -195,7 +195,7 @@ entry: ; CHECK-LABEL: @no_merge_load_i64_ptr32( ; CHECK: load i64, ; CHECK: load i8 addrspace(3)*, -define void @no_merge_load_i64_ptr32(i64 addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @no_merge_load_i64_ptr32(i64 addrspace(1)* nocapture %a) #0 { entry: %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(3)* addrspace(1)* @@ -209,7 +209,7 @@ entry: ; CHECK-LABEL: @no_merge_load_ptr32_i64( ; CHECK: load i8 addrspace(3)*, ; CHECK: load i64, -define void @no_merge_load_ptr32_i64(i64 addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @no_merge_load_ptr32_i64(i64 addrspace(1)* nocapture %a) #0 { entry: %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)* %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 @@ -226,7 +226,7 @@ entry: ; CHECK: load <2 x i8 addrspace(1)*> ; CHECK: store <2 x i8 addrspace(1)*> ; CHECK: store <2 x i8 addrspace(1)*> -define void @merge_v2p1i8_v2p1i8(<2 x i8 addrspace(1)*> addrspace(1)* nocapture noalias %a, <2 x i8 addrspace(1)*> addrspace(1)* nocapture readonly noalias %b) #0 { +define amdgpu_kernel void @merge_v2p1i8_v2p1i8(<2 x i8 addrspace(1)*> addrspace(1)* nocapture noalias %a, <2 x i8 addrspace(1)*> addrspace(1)* nocapture readonly noalias %b) #0 { entry: %a.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %a, i64 1 %b.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, i64 1 @@ -245,7 +245,7 @@ entry: ; CHECK: [[ELT0_INT:%[^ ]+]] = inttoptr i64 [[ELT0]] to i8 addrspace(1)* ; CHECK: [[ELT1_INT:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1 ; CHECK: bitcast i64 [[ELT1_INT]] to double -define void @merge_load_ptr64_f64(double addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_ptr64_f64(double addrspace(1)* nocapture %a) #0 { entry: %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 @@ -262,7 +262,7 @@ entry: ; CHECK: bitcast i64 [[ELT0]] to double ; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1 ; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)* -define void @merge_load_f64_ptr64(double addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_f64_ptr64(double addrspace(1)* nocapture %a) #0 { entry: %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 %a.1.cast = bitcast double addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)* @@ -279,7 +279,7 @@ entry: ; CHECK: [[ELT1_INT:%[^ ]+]] = bitcast double %val1 to i64 ; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1 ; CHECK: store <2 x i64> -define void @merge_store_ptr64_f64(double addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, double %val1) #0 { +define amdgpu_kernel void @merge_store_ptr64_f64(double addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, double %val1) #0 { entry: %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 @@ -296,7 +296,7 @@ entry: ; CHECK: [[ELT1_INT:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64 ; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1 ; CHECK: store <2 x i64> -define void @merge_store_f64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, double %val0, i8 addrspace(1)* %ptr1) #0 { +define amdgpu_kernel void @merge_store_f64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, double %val0, i8 addrspace(1)* %ptr1) #0 { entry: %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1 %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to double addrspace(1)* diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/store_with_aliasing_load.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/store_with_aliasing_load.ll index d70c449e14d..63e688e63fb 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/store_with_aliasing_load.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/store_with_aliasing_load.ll @@ -9,7 +9,7 @@ ; CHECK: store <4 x float> ; Function Attrs: nounwind -define void @store_vectorize_with_alias(i8 addrspace(1)* %a, i8 addrspace(1)* %b) #0 { +define amdgpu_kernel void @store_vectorize_with_alias(i8 addrspace(1)* %a, i8 addrspace(1)* %b) #0 { bb: %tmp = bitcast i8 addrspace(1)* %b to float addrspace(1)* %tmp1 = load float, float addrspace(1)* %tmp, align 4 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/weird-type-accesses.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/weird-type-accesses.ll index 18f62be27c8..412d2013f6b 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/weird-type-accesses.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/weird-type-accesses.ll @@ -16,7 +16,7 @@ declare void @use_v2i9(<2 x i9>) ; CHECK-LABEL: @merge_store_2_constants_i1( ; CHECK: store i1 ; CHECK: store i1 -define void @merge_store_2_constants_i1(i1 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_store_2_constants_i1(i1 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i1, i1 addrspace(1)* %out, i32 1 store i1 true, i1 addrspace(1)* %out.gep.1 store i1 false, i1 addrspace(1)* %out @@ -26,7 +26,7 @@ define void @merge_store_2_constants_i1(i1 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_store_2_constants_i2( ; CHECK: store i2 1 ; CHECK: store i2 -1 -define void @merge_store_2_constants_i2(i2 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_store_2_constants_i2(i2 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i2, i2 addrspace(1)* %out, i32 1 store i2 1, i2 addrspace(1)* %out.gep.1 store i2 -1, i2 addrspace(1)* %out @@ -36,7 +36,7 @@ define void @merge_store_2_constants_i2(i2 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_different_store_sizes_i1_i8( ; CHECK: store i1 true ; CHECK: store i8 123 -define void @merge_different_store_sizes_i1_i8(i8 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_different_store_sizes_i1_i8(i8 addrspace(1)* %out) #0 { %out.i1 = bitcast i8 addrspace(1)* %out to i1 addrspace(1)* %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 store i1 true, i1 addrspace(1)* %out.i1 @@ -47,7 +47,7 @@ define void @merge_different_store_sizes_i1_i8(i8 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_different_store_sizes_i8_i1( ; CHECK: store i8 123 ; CHECK: store i1 true -define void @merge_different_store_sizes_i8_i1(i1 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_different_store_sizes_i8_i1(i1 addrspace(1)* %out) #0 { %out.i8 = bitcast i1 addrspace(1)* %out to i8 addrspace(1)* %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out.i8, i32 1 store i8 123, i8 addrspace(1)* %out.gep.1 @@ -58,7 +58,7 @@ define void @merge_different_store_sizes_i8_i1(i1 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_store_2_constant_structs( ; CHECK: store %struct.foo ; CHECK: store %struct.foo -define void @merge_store_2_constant_structs(%struct.foo addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_store_2_constant_structs(%struct.foo addrspace(1)* %out) #0 { %out.gep.1 = getelementptr %struct.foo, %struct.foo addrspace(1)* %out, i32 1 store %struct.foo { i32 12, i8 3 }, %struct.foo addrspace(1)* %out.gep.1 store %struct.foo { i32 92, i8 9 }, %struct.foo addrspace(1)* %out @@ -69,7 +69,7 @@ define void @merge_store_2_constant_structs(%struct.foo addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_store_2_constants_v2i2( ; CHECK: store <2 x i2> ; CHECK: store <2 x i2> -define void @merge_store_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_store_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 { %out.gep.1 = getelementptr <2 x i2>, <2 x i2> addrspace(1)* %out, i32 1 store <2 x i2> <i2 1, i2 -1>, <2 x i2> addrspace(1)* %out.gep.1 store <2 x i2> <i2 -1, i2 1>, <2 x i2> addrspace(1)* %out @@ -81,7 +81,7 @@ define void @merge_store_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_store_2_constants_v4i2( ; CHECK: store <4 x i2> ; CHECK: store <4 x i2> -define void @merge_store_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_store_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 { %out.gep.1 = getelementptr <4 x i2>, <4 x i2> addrspace(1)* %out, i32 1 store <4 x i2> <i2 1, i2 -1, i2 1, i2 -1>, <4 x i2> addrspace(1)* %out.gep.1 store <4 x i2> <i2 -1, i2 1, i2 -1, i2 1>, <4 x i2> addrspace(1)* %out @@ -91,7 +91,7 @@ define void @merge_store_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_load_2_constants_i1( ; CHECK: load i1 ; CHECK: load i1 -define void @merge_load_2_constants_i1(i1 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_load_2_constants_i1(i1 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i1, i1 addrspace(1)* %out, i32 1 %x = load i1, i1 addrspace(1)* %out.gep.1 %y = load i1, i1 addrspace(1)* %out @@ -103,7 +103,7 @@ define void @merge_load_2_constants_i1(i1 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_load_2_constants_i2( ; CHECK: load i2 ; CHECK: load i2 -define void @merge_load_2_constants_i2(i2 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_load_2_constants_i2(i2 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i2, i2 addrspace(1)* %out, i32 1 %x = load i2, i2 addrspace(1)* %out.gep.1 %y = load i2, i2 addrspace(1)* %out @@ -115,7 +115,7 @@ define void @merge_load_2_constants_i2(i2 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_different_load_sizes_i1_i8( ; CHECK: load i1 ; CHECK: load i8 -define void @merge_different_load_sizes_i1_i8(i8 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_different_load_sizes_i1_i8(i8 addrspace(1)* %out) #0 { %out.i1 = bitcast i8 addrspace(1)* %out to i1 addrspace(1)* %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 %x = load i1, i1 addrspace(1)* %out.i1 @@ -128,7 +128,7 @@ define void @merge_different_load_sizes_i1_i8(i8 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_different_load_sizes_i8_i1( ; CHECK: load i8 ; CHECK: load i1 -define void @merge_different_load_sizes_i8_i1(i1 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_different_load_sizes_i8_i1(i1 addrspace(1)* %out) #0 { %out.i8 = bitcast i1 addrspace(1)* %out to i8 addrspace(1)* %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out.i8, i32 1 %x = load i8, i8 addrspace(1)* %out.gep.1 @@ -141,7 +141,7 @@ define void @merge_different_load_sizes_i8_i1(i1 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_load_2_constant_structs( ; CHECK: load %struct.foo ; CHECK: load %struct.foo -define void @merge_load_2_constant_structs(%struct.foo addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_load_2_constant_structs(%struct.foo addrspace(1)* %out) #0 { %out.gep.1 = getelementptr %struct.foo, %struct.foo addrspace(1)* %out, i32 1 %x = load %struct.foo, %struct.foo addrspace(1)* %out.gep.1 %y = load %struct.foo, %struct.foo addrspace(1)* %out @@ -153,7 +153,7 @@ define void @merge_load_2_constant_structs(%struct.foo addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_load_2_constants_v2i2( ; CHECK: load <2 x i2> ; CHECK: load <2 x i2> -define void @merge_load_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_load_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 { %out.gep.1 = getelementptr <2 x i2>, <2 x i2> addrspace(1)* %out, i32 1 %x = load <2 x i2>, <2 x i2> addrspace(1)* %out.gep.1 %y = load <2 x i2>, <2 x i2> addrspace(1)* %out @@ -165,7 +165,7 @@ define void @merge_load_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_load_2_constants_v4i2( ; CHECK: load <4 x i2> ; CHECK: load <4 x i2> -define void @merge_load_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_load_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 { %out.gep.1 = getelementptr <4 x i2>, <4 x i2> addrspace(1)* %out, i32 1 %x = load <4 x i2>, <4 x i2> addrspace(1)* %out.gep.1 %y = load <4 x i2>, <4 x i2> addrspace(1)* %out @@ -177,7 +177,7 @@ define void @merge_load_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_store_2_constants_i9( ; CHECK: store i9 3 ; CHECK: store i9 -5 -define void @merge_store_2_constants_i9(i9 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_store_2_constants_i9(i9 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i9, i9 addrspace(1)* %out, i32 1 store i9 3, i9 addrspace(1)* %out.gep.1 store i9 -5, i9 addrspace(1)* %out @@ -187,7 +187,7 @@ define void @merge_store_2_constants_i9(i9 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_load_2_constants_v2i9( ; CHECK: load <2 x i9> ; CHECK: load <2 x i9> -define void @merge_load_2_constants_v2i9(<2 x i9> addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_load_2_constants_v2i9(<2 x i9> addrspace(1)* %out) #0 { %out.gep.1 = getelementptr <2 x i9>, <2 x i9> addrspace(1)* %out, i32 1 %x = load <2 x i9>, <2 x i9> addrspace(1)* %out.gep.1 %y = load <2 x i9>, <2 x i9> addrspace(1)* %out diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/atomics.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/atomics.ll index ccad351f66f..054c61d1879 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/atomics.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/atomics.ll @@ -17,7 +17,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; OPT: %tmp7 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 undef seq_cst ; OPT: %0 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 %tmp8 seq_cst ; OPT: br i1 %exitcond -define void @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { +define amdgpu_kernel void @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { bb: %tmp = icmp sgt i32 %n, 0 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge @@ -54,7 +54,7 @@ bb: ; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ] ; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383 ; OPT: %tmp4 = cmpxchg i32 addrspace(3)* %scevgep4, i32 undef, i32 undef seq_cst monotonic -define void @test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { +define amdgpu_kernel void @test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { bb: %tmp = icmp sgt i32 %n, 0 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll index bf61112a3c3..c5ea1b915d9 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll @@ -10,7 +10,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; OPT: %lsr.iv2 = phi i8 addrspace(1)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ] ; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv2, i64 4095 ; OPT: load i8, i8 addrspace(1)* %scevgep4, align 1 -define void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { +define amdgpu_kernel void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { bb: %tmp = icmp sgt i32 %n, 0 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge @@ -48,7 +48,7 @@ bb: ; OPT: {{^}}.lr.ph: ; OPT: %lsr.iv3 = phi i8 addrspace(1)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ] ; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv3, i64 1 -define void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { +define amdgpu_kernel void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 { bb: %tmp = icmp sgt i32 %n, 0 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge @@ -83,7 +83,7 @@ bb: ; OPT: %lsr.iv2 = phi i8 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ] ; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv2, i32 65535 ; OPT: %tmp4 = load i8, i8 addrspace(3)* %scevgep4, align 1 -define void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { +define amdgpu_kernel void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { bb: %tmp = icmp sgt i32 %n, 0 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge @@ -122,7 +122,7 @@ bb: ; OPT: {{^}}.lr.ph: ; OPT: %lsr.iv3 = phi i8 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ] ; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv3, i32 1 -define void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { +define amdgpu_kernel void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 { bb: %tmp = icmp sgt i32 %n, 0 br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll index 89b62632cac..02c3c05e794 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll @@ -15,7 +15,7 @@ target triple = "amdgcn--" ;CHECK: buffer_store_dword ;CHECK: s_branch [[LOOP_LABEL]] -define void @foo() { +define amdgpu_kernel void @foo() { entry: br label %loop diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll index 8c83df5843d..67b1926bdf2 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll @@ -16,7 +16,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK: bb: ; CHECK: inttoptr i32 %lsr.iv.next2 to i8 addrspace(3)* ; CHECK: %c1 = icmp ne i8 addrspace(3)* -define void @local_cmp_user(i32 %arg0) nounwind { +define amdgpu_kernel void @local_cmp_user(i32 %arg0) nounwind { entry: br label %bb11 @@ -47,7 +47,7 @@ bb13: ; CHECK: bb: ; CHECK: inttoptr i64 %lsr.iv.next2 to i8 addrspace(1)* ; CHECK: icmp ne i8 addrspace(1)* %t -define void @global_cmp_user(i64 %arg0) nounwind { +define amdgpu_kernel void @global_cmp_user(i64 %arg0) nounwind { entry: br label %bb11 @@ -78,7 +78,7 @@ bb13: ; CHECK: bb: ; CHECK: %idxprom = sext i32 %lsr.iv1 to i64 ; CHECK: getelementptr i8, i8 addrspace(1)* %t, i64 %idxprom -define void @global_gep_user(i32 %arg0) nounwind { +define amdgpu_kernel void @global_gep_user(i32 %arg0) nounwind { entry: br label %bb11 @@ -108,7 +108,7 @@ bb13: ; CHECK: bb ; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext -define void @global_sext_scale_user(i32 %arg0) nounwind { +define amdgpu_kernel void @global_sext_scale_user(i32 %arg0) nounwind { entry: br label %bb11 diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll index b3b696d42c5..9eba0c3051d 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/preserve-addrspace-assert.ll @@ -14,7 +14,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK: %scevgep = getelementptr i32, i32 addrspace(3)* %tmp1, i32 4 ; CHECK:%tmp14 = load i32, i32 addrspace(3)* %scevgep -define void @lsr_crash_preserve_addrspace_unknown_type() #0 { +define amdgpu_kernel void @lsr_crash_preserve_addrspace_unknown_type() #0 { bb: br label %bb1 diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll index e732ddc2bc8..ca8cc32469d 100644 --- a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll +++ b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll @@ -6,7 +6,7 @@ ; CHECK: call void @llvm.amdgcn.s.barrier() ; CHECK: call void @llvm.amdgcn.s.barrier() ; CHECK-NOT: br -define void @test_unroll_convergent_barrier(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(1)* noalias nocapture %in) #0 { +define amdgpu_kernel void @test_unroll_convergent_barrier(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(1)* noalias nocapture %in) #0 { entry: br label %for.body diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll index 915a13d4961..e986c3dc2a2 100644 --- a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll +++ b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll @@ -7,7 +7,7 @@ ; CHECK: store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4 ; CHECK: ret void -define void @non_invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) { +define amdgpu_kernel void @non_invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) { entry: %arr = alloca [64 x i32], align 4 %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -40,7 +40,7 @@ for.body: ; preds = %for.body, %entry ; CHECK: br i1 %[[exitcond]] ; CHECK-NOT: icmp eq i32 %{{.*}}, 100 -define void @invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) { +define amdgpu_kernel void @invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) { entry: %arr = alloca [64 x i32], align 4 %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -82,7 +82,7 @@ for.body6: ; preds = %for.body6, %for.con ; CHECK: icmp eq i32 %{{.*}}, 100 ; CHECK: br -define void @too_big(i32 addrspace(1)* nocapture %a, i32 %x) { +define amdgpu_kernel void @too_big(i32 addrspace(1)* nocapture %a, i32 %x) { entry: %arr = alloca [256 x i32], align 4 %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -116,7 +116,7 @@ for.body: ; preds = %for.body, %entry ; CHECK: icmp eq i32 %{{.*}}, 100 ; CHECK: br -define void @dynamic_size_alloca(i32 addrspace(1)* nocapture %a, i32 %n, i32 %x) { +define amdgpu_kernel void @dynamic_size_alloca(i32 addrspace(1)* nocapture %a, i32 %n, i32 %x) { entry: %arr = alloca i32, i32 %n, align 4 %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 diff --git a/llvm/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll b/llvm/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll index e70467a9963..1f106bd894a 100644 --- a/llvm/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll +++ b/llvm/test/Transforms/LoopUnswitch/AMDGPU/divergent-unswitch.ll @@ -3,14 +3,14 @@ ; Check that loop unswitch happened and condition hoisted out of the loop. ; Condition is uniform so all targets should perform unswitching. -; CHECK-LABEL: {{^}}define void @uniform_unswitch +; CHECK-LABEL: {{^}}define amdgpu_kernel void @uniform_unswitch ; CHECK: entry: ; CHECK-NEXT: [[LOOP_COND:%[a-z0-9]+]] = icmp ; CHECK-NEXT: [[IF_COND:%[a-z0-9]+]] = icmp eq i32 %x, 123456 ; CHECK-NEXT: and i1 [[LOOP_COND]], [[IF_COND]] ; CHECK-NEXT: br i1 -define void @uniform_unswitch(i32 * nocapture %out, i32 %n, i32 %x) { +define amdgpu_kernel void @uniform_unswitch(i32 * nocapture %out, i32 %n, i32 %x) { entry: %cmp6 = icmp sgt i32 %n, 0 br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup @@ -42,14 +42,14 @@ for.inc: ; preds = %for.body, %if.then ; Check that loop unswitch does not happen if condition is divergent. -; CHECK-LABEL: {{^}}define void @divergent_unswitch +; CHECK-LABEL: {{^}}define amdgpu_kernel void @divergent_unswitch ; CHECK: entry: ; CHECK: icmp ; CHECK: [[IF_COND:%[a-z0-9]+]] = icmp {{.*}} 567890 ; CHECK: br label ; CHECK: br i1 [[IF_COND]] -define void @divergent_unswitch(i32 * nocapture %out, i32 %n) { +define amdgpu_kernel void @divergent_unswitch(i32 * nocapture %out, i32 %n) { entry: %cmp9 = icmp sgt i32 %n, 0 br i1 %cmp9, label %for.body.lr.ph, label %for.cond.cleanup diff --git a/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll b/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll index 85ba95cab16..f303ed5377e 100644 --- a/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll +++ b/llvm/test/Transforms/LoopVectorize/AMDGPU/unroll-in-loop-vectorizer.ll @@ -7,7 +7,7 @@ ; CHECK: store i32 ; CHECK-NOT: store i32 ; CHECK: ret -define void @small_loop(i32* nocapture %inArray, i32 %size) nounwind { +define amdgpu_kernel void @small_loop(i32* nocapture %inArray, i32 %size) nounwind { entry: %0 = icmp sgt i32 %size, 0 br i1 %0, label %loop, label %exit diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll index 35763953911..63c6d77954d 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/simplebb.ll @@ -9,7 +9,7 @@ target datalayout = "e-p:32:32:32-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32- ; Simple 3-pair chain with loads and stores -define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) { +define amdgpu_kernel void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) { ; CHECK-LABEL: @test1_as_3_3_3( ; CHECK: load <2 x double>, <2 x double> addrspace(3)* ; CHECK: load <2 x double>, <2 x double> addrspace(3)* @@ -29,7 +29,7 @@ define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, do ret void } -define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) { +define amdgpu_kernel void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) { ; CHECK-LABEL: @test1_as_3_0_0( ; CHECK: load <2 x double>, <2 x double> addrspace(3)* ; CHECK: load <2 x double>, <2 x double>* @@ -49,7 +49,7 @@ define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) { ret void } -define void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) { +define amdgpu_kernel void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) { ; CHECK-LABEL: @test1_as_0_0_3( ; CHECK: load <2 x double>, <2 x double>* ; CHECK: load <2 x double>, <2 x double>* diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll index 5815ae62737..23ec0ca2554 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll @@ -9,7 +9,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1 ; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32 ; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33 -define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { +define amdgpu_kernel void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { %tmp = sext i32 %y to i64 %tmp1 = sext i32 %x to i64 %tmp2 = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %tmp1, i64 %tmp @@ -42,7 +42,7 @@ define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) ; IR: add i32 %x, 256 ; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} ; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { +define amdgpu_kernel void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { %tmp = sext i32 %y to i64 %tmp1 = sext i32 %x to i64 %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %tmp1, i64 %tmp @@ -74,7 +74,7 @@ define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace( ; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 255 ; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16128 ; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16383 -define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { +define amdgpu_kernel void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) { %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y %tmp4 = load float, float addrspace(3)* %tmp2, align 4 %tmp5 = fadd float %tmp4, 0.000000e+00 diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll b/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll index f2853aca698..9554ae69031 100644 --- a/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll +++ b/llvm/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll @@ -6,7 +6,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK-LABEL: @slsr_after_reassociate_global_geps_mubuf_max_offset( ; CHECK: [[b1:%[0-9]+]] = getelementptr float, float addrspace(1)* %arr, i64 [[bump:%[0-9]+]] ; CHECK: [[b2:%[0-9]+]] = getelementptr float, float addrspace(1)* [[b1]], i64 [[bump]] -define void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) { +define amdgpu_kernel void @slsr_after_reassociate_global_geps_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) { bb: %i2 = shl nsw i32 %i, 1 %j1 = add nsw i32 %i, 1023 @@ -33,7 +33,7 @@ bb: ; CHECK: %tmp = sext i32 %j1 to i64 ; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp ; CHECK: getelementptr inbounds float, float addrspace(1)* %arr, i64 %tmp5 -define void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) { +define amdgpu_kernel void @slsr_after_reassociate_global_geps_over_mubuf_max_offset(float addrspace(1)* %out, float addrspace(1)* noalias %arr, i32 %i) { bb: %i2 = shl nsw i32 %i, 1 %j1 = add nsw i32 %i, 1024 @@ -61,7 +61,7 @@ bb: ; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i ; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383 -define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) { +define amdgpu_kernel void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) { bb: %i2 = shl nsw i32 %i, 1 %j1 = add nsw i32 %i, 16383 @@ -86,7 +86,7 @@ bb: ; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j1 ; CHECK: %j2 = add i32 %j1, %i ; CHECK: getelementptr inbounds float, float addrspace(3)* %arr, i32 %j2 -define void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) { +define amdgpu_kernel void @slsr_after_reassociate_lds_geps_over_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) { bb: %i2 = shl nsw i32 %i, 1 %j1 = add nsw i32 %i, 16384 |