diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/merge-stores.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/merge-stores.ll | 76 |
1 files changed, 38 insertions, 38 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/merge-stores.ll b/llvm/test/CodeGen/AMDGPU/merge-stores.ll index fd2d8ee1189..dfd5b97fcc8 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-stores.ll +++ b/llvm/test/CodeGen/AMDGPU/merge-stores.ll @@ -13,7 +13,7 @@ ; GCN: buffer_store_byte ; GCN: buffer_store_byte ; GCN: s_endpgm -define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 store i8 123, i8 addrspace(1)* %out.gep.1 @@ -25,7 +25,7 @@ define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 { ; GCN: buffer_store_byte ; GCN: buffer_store_byte ; GCN: s_endpgm -define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 store i8 123, i8 addrspace(1)* %out.gep.1 @@ -35,7 +35,7 @@ define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %o ; GCN-LABEL: {{^}}merge_global_store_2_constants_i16: ; GCN: buffer_store_dword v -define void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 store i16 123, i16 addrspace(1)* %out.gep.1 @@ -45,7 +45,7 @@ define void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 { ; GCN-LABEL: {{^}}merge_global_store_2_constants_0_i16: ; GCN: buffer_store_dword v -define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 store i16 0, i16 addrspace(1)* %out.gep.1 @@ -57,7 +57,7 @@ define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 { ; GCN: buffer_store_short ; GCN: buffer_store_short ; GCN: s_endpgm -define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 store i16 123, i16 addrspace(1)* %out.gep.1 @@ -69,7 +69,7 @@ define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* ; SI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8 ; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} -define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 store i32 123, i32 addrspace(1)* %out.gep.1 @@ -79,7 +79,7 @@ define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 { ; GCN-LABEL: {{^}}merge_global_store_2_constants_i32_f32: ; GCN: buffer_store_dwordx2 -define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.1.bc = bitcast i32 addrspace(1)* %out.gep.1 to float addrspace(1)* store float 1.0, float addrspace(1)* %out.gep.1.bc @@ -91,7 +91,7 @@ define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 { ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 4.0 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0x7b ; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} -define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)* store i32 123, i32 addrspace(1)* %out.gep.1.bc @@ -105,7 +105,7 @@ define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x7b{{$}} ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x4d2{{$}} ; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI]]{{\]}} -define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -119,7 +119,7 @@ define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order: ; GCN: buffer_store_dwordx4 -define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 @@ -134,7 +134,7 @@ define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) ; First store is out of order. ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32: ; GCN: buffer_store_dwordx4 -define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 @@ -149,7 +149,7 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { ; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32: ; GCN-AA: buffer_store_dwordx4 v ; GCN: s_endpgm -define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 @@ -169,7 +169,7 @@ define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %o ; SI-DAG: buffer_store_dword ; SI-NOT: buffer_store_dword ; GCN: s_endpgm -define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 @@ -181,7 +181,7 @@ define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 { ; GCN-LABEL: {{^}}merge_global_store_2_constants_i64: ; GCN: buffer_store_dwordx4 -define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1 store i64 123, i64 addrspace(1)* %out.gep.1 @@ -192,7 +192,7 @@ define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 { ; GCN-LABEL: {{^}}merge_global_store_4_constants_i64: ; GCN: buffer_store_dwordx4 ; GCN: buffer_store_dwordx4 -define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1 %out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2 %out.gep.3 = getelementptr i64, i64 addrspace(1)* %out, i64 3 @@ -207,7 +207,7 @@ define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 { ; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32: ; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]] ; GCN: buffer_store_dwordx2 [[LOAD]] -define void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 @@ -222,7 +222,7 @@ define void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 ; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32_nonzero_base: ; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 ; GCN: buffer_store_dwordx2 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3 @@ -241,7 +241,7 @@ define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace( ; GCN: buffer_load_dword v ; GCN: buffer_store_dword v ; GCN: buffer_store_dword v -define void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 @@ -256,7 +256,7 @@ define void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* % ; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32: ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] ; GCN: buffer_store_dwordx4 [[LOAD]] -define void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -283,7 +283,7 @@ define void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 ; SI-DAG: buffer_store_dword v ; SI-DAG: buffer_store_dwordx2 v ; GCN: s_endpgm -define void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 @@ -302,7 +302,7 @@ define void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 ; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_f32: ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] ; GCN: buffer_store_dwordx4 [[LOAD]] -define void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 @@ -325,7 +325,7 @@ define void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, f ; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32_nonzero_base: ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 ; GCN: buffer_store_dwordx4 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28 -define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12 %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 13 @@ -351,7 +351,7 @@ define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace( ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] ; GCN: s_barrier ; GCN: buffer_store_dwordx4 [[LOAD]] -define void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -388,7 +388,7 @@ define void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* % ; GCN: buffer_store_dword v ; GCN: buffer_store_dword v ; GCN: buffer_store_dword v -define void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -416,7 +416,7 @@ define void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* % ; GCN: buffer_load_dword [[LOAD:v[0-9]+]] ; GCN: buffer_store_dword [[LOAD]] ; GCN: s_endpgm -define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1 %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2 %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3 @@ -446,7 +446,7 @@ define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 ad ; GCN: buffer_store_byte ; GCN: buffer_store_byte ; GCN: s_endpgm -define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1 %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2 %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3 @@ -470,7 +470,7 @@ define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1 ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]] ; GCN: buffer_store_dwordx4 [[LOAD]] ; GCN: s_endpgm -define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -492,7 +492,7 @@ define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out ; GCN: ds_write_b8 ; GCN: ds_write_b8 ; GCN: s_endpgm -define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { +define amdgpu_kernel void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1 store i8 123, i8 addrspace(3)* %out.gep.1 @@ -504,7 +504,7 @@ define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b ; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}} -define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { +define amdgpu_kernel void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 store i32 123, i32 addrspace(3)* %out.gep.1 @@ -522,7 +522,7 @@ define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { ; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K0]], [[K1]] offset1:1 ; GCN: s_endpgm -define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 { +define amdgpu_kernel void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(3)* %out, i32 3 @@ -540,7 +540,7 @@ define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 { ; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI4]]{{\]}} ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 11{{$}} ; GCN: buffer_store_dword v[[HI]] -define void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) { store i32 9, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 store i32 12, i32 addrspace(1)* %idx1, align 4 @@ -556,7 +556,7 @@ define void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) { ; GCN-LABEL: {{^}}merge_global_store_6_constants_i32: ; GCN: buffer_store_dwordx4 ; GCN: buffer_store_dwordx2 -define void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) { store i32 13, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 store i32 15, i32 addrspace(1)* %idx1, align 4 @@ -575,7 +575,7 @@ define void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) { ; GCN: buffer_store_dwordx4 ; GCN: buffer_store_dwordx2 ; GCN: buffer_store_dword v -define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { store i32 34, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 store i32 999, i32 addrspace(1)* %idx1, align 4 @@ -596,7 +596,7 @@ define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { ; GCN: buffer_store_dwordx4 ; GCN: buffer_store_dwordx4 ; GCN: s_endpgm -define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { store i32 34, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 store i32 999, i32 addrspace(1)* %idx1, align 4 @@ -630,7 +630,7 @@ define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 ; GCN: ScratchSize: 0{{$}} -define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 { +define amdgpu_kernel void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 { %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4 store <3 x i32> %vec, <3 x i32> addrspace(1)* %out ret void @@ -646,7 +646,7 @@ define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> a ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} ; GCN: ScratchSize: 0{{$}} -define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 { +define amdgpu_kernel void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 { %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4 store <3 x i64> %vec, <3 x i64> addrspace(1)* %out ret void @@ -662,7 +662,7 @@ define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> a ; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 ; GCN: ScratchSize: 0{{$}} -define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 { +define amdgpu_kernel void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 { %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4 %fadd = fadd <3 x float> %vec, <float 1.0, float 2.0, float 4.0> store <3 x float> %fadd, <3 x float> addrspace(1)* %out @@ -679,7 +679,7 @@ define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x floa ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} ; GCN: ScratchSize: 0{{$}} -define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 { +define amdgpu_kernel void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 { %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4 %fadd = fadd <3 x double> %vec, <double 1.0, double 2.0, double 4.0> store <3 x double> %fadd, <3 x double> addrspace(1)* %out |