diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-03-21 21:39:51 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-03-21 21:39:51 +0000 |
| commit | 3dbeefa978fb7e7b231b249f9cd90c67b9e83277 (patch) | |
| tree | d74bf7fe30e44588d573919f3625edacb2586112 /llvm/test/CodeGen/AMDGPU/salu-to-valu.ll | |
| parent | f6021ecddc73d14c94ad70938250d58f330795be (diff) | |
| download | bcm5719-llvm-3dbeefa978fb7e7b231b249f9cd90c67b9e83277.tar.gz bcm5719-llvm-3dbeefa978fb7e7b231b249f9cd90c67b9e83277.zip | |
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel
Currently the default C calling convention functions are treated
the same as compute kernels. Make this explicit so the default
calling convention can be changed to a non-kernel.
Converted with perl -pi -e 's/define void/define amdgpu_kernel void/'
on the relevant test directories (and undoing in one place that actually
wanted a non-kernel).
llvm-svn: 298444
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/salu-to-valu.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/salu-to-valu.ll | 36 |
1 files changed, 18 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll index 14369a8953f..6e1dd163833 100644 --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -24,7 +24,7 @@ declare i32 @llvm.amdgcn.workitem.id.y() #0 ; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}} ; GCN-HSA: flat_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}} -define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { +define amdgpu_kernel void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = call i32 @llvm.amdgcn.workitem.id.y() @@ -65,7 +65,7 @@ done: ; preds = %loop ; GCN: v_mov_b32_e32 [[V_OUT:v[0-9]+]], [[OUT]] ; GCN-NOHSA: buffer_store_dword [[V_OUT]] ; GCN-HSA: flat_store_dword {{.*}}, [[V_OUT]] -define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 { entry: %tmp = icmp ne i32 %a, 0 br i1 %tmp, label %if, label %else @@ -93,7 +93,7 @@ endif: ; preds = %else, %if ; GCN-NOHSA-NOT: v_add ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}} ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] -define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 { +define amdgpu_kernel void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 @@ -113,7 +113,7 @@ entry: ; GCN-NOHSA: buffer_store_dword ; GCN-HSA: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} -define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp @@ -133,7 +133,7 @@ entry: ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} ; GCN-NOHSA: buffer_store_dwordx2 ; GCN-HSA: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] -define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp @@ -155,7 +155,7 @@ entry: ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} ; GCN-NOHSA: buffer_store_dwordx4 ; GCN-HSA: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] -define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp @@ -189,7 +189,7 @@ entry: ; GCN-NOHSA: buffer_store_dwordx4 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp @@ -230,7 +230,7 @@ entry: ; GCN-HSA: flat_load_dwordx4 ; GCN: s_endpgm -define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 { +define amdgpu_kernel void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp @@ -247,7 +247,7 @@ entry: ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]] ; GCN-NOHSA: buffer_store_dword [[ADD]] ; GCN-HSA: flat_store_dword {{.*}}, [[ADD]] -define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 { +define amdgpu_kernel void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 @@ -261,7 +261,7 @@ entry: ; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset: ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}} ; GCN-HSA flat_load_dword v{{[0-9]}}, v{{[0-9]+:[0-9]+}} -define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { +define amdgpu_kernel void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 @@ -275,7 +275,7 @@ entry: ; GCN-NOHSA-NOT: v_add ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}} ; GCN-HSA: flat_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}] -define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { +define amdgpu_kernel void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 { entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = add i32 %tmp, 4 @@ -290,7 +290,7 @@ entry: ; GCN-NOHSA: buffer_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +define amdgpu_kernel void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 @@ -313,7 +313,7 @@ entry: ; GCN-NOHSA: buffer_store_dword ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +define amdgpu_kernel void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 @@ -350,7 +350,7 @@ entry: ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +define amdgpu_kernel void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 @@ -385,7 +385,7 @@ entry: ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 ; GCN-HSA: flat_load_dwordx4 -define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { +define amdgpu_kernel void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 { entry: %tmp0 = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0 @@ -441,7 +441,7 @@ entry: ; GCN-HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[ONE]] ; GCN: {{^}}[[EXIT]]: ; GCN: s_endpgm -define void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +define amdgpu_kernel void @sopc_vopc_legalize_bug(i32 %cond, i32 addrspace(1)* %out, i32 addrspace(1)* %in) { bb3: ; preds = %bb2 %tmp0 = bitcast i32 %cond to float %tmp1 = fadd float %tmp0, 2.500000e-01 @@ -459,7 +459,7 @@ bb7: ; preds = %bb3 ; GCN-LABEL: {{^}}phi_visit_order: ; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 1, v{{[0-9]+}} -define void @phi_visit_order() { +define amdgpu_kernel void @phi_visit_order() { bb: br label %bb1 @@ -484,7 +484,7 @@ bb4: ; GCN: [[LOOP_LABEL:[0-9a-zA-Z_]+]]: ; GCN: s_xor_b32 [[B]], [[B]], [[A]] ; GCN: s_cbranch_scc{{[01]}} [[LOOP_LABEL]] -define void @phi_imm_in_sgprs(i32 addrspace(3)* %out, i32 %cond) { +define amdgpu_kernel void @phi_imm_in_sgprs(i32 addrspace(3)* %out, i32 %cond) { entry: br label %loop |

