diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-07-12 20:12:17 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-07-12 20:12:17 +0000 |
commit | 51a05d72ae2969b290d1565c9af1b8f6995ed9ea (patch) | |
tree | c90d92b5fa2baad233da30098dec6e45663bd4cd | |
parent | ae79a2c3900cf0d03f0f4113a72c3f3927409bb8 (diff) | |
download | bcm5719-llvm-51a05d72ae2969b290d1565c9af1b8f6995ed9ea.tar.gz bcm5719-llvm-51a05d72ae2969b290d1565c9af1b8f6995ed9ea.zip |
AMDGPU: Drop remnants of byval support for shaders
Before 2018, mesa used to use byval interchangably with inreg, which
didn't really make sense. Fix tests still using it to avoid breaking
in a future commit.
llvm-svn: 365953
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/default-fp-mode.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/mubuf.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/ret.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/ret_jump.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/sgpr-copy.ll | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-scheduler.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/split-smrd.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/wait.ll | 2 |
14 files changed, 31 insertions, 40 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 69c6a3361de..3eb1b1c9106 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1552,8 +1552,7 @@ static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits, // First check if it's a PS input addr. if (CallConv == CallingConv::AMDGPU_PS && - !Arg->Flags.isInReg() && !Arg->Flags.isByVal() && PSInputNum <= 15) { - + !Arg->Flags.isInReg() && PSInputNum <= 15) { bool SkipArg = !Arg->Used && !Info->isPSInputAllocated(PSInputNum); // Inconveniently only the first part of the split is marked as isSplit, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll index 557ce169c19..44b06b2235e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -17,14 +17,6 @@ define amdgpu_vs void @test_f32(float %arg0) { ret void } -; CHECK-LABEL: name: test_ptr2_byval -; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3 -; CHECK: G_LOAD [[S01]] -define amdgpu_vs void @test_ptr2_byval(i32 addrspace(4)* byval %arg0) { - %tmp0 = load volatile i32, i32 addrspace(4)* %arg0 - ret void -} - ; CHECK-LABEL: name: test_ptr2_inreg ; CHECK: [[S01:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3 ; CHECK: G_LOAD [[S01]] diff --git a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll index 94d518e63ce..fb6491937e5 100644 --- a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll +++ b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll @@ -95,7 +95,7 @@ main_body: ; GCN-LABEL: {{^}}kill_vcc_implicit_def: ; GCN: IeeeMode: 0 -define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(4)* byval, [17 x <16 x i8>] addrspace(4)* byval, [17 x <4 x i32>] addrspace(4)* byval, [34 x <8 x i32>] addrspace(4)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) { +define amdgpu_ps float @kill_vcc_implicit_def([6 x <16 x i8>] addrspace(4)* inreg, [17 x <16 x i8>] addrspace(4)* inreg, [17 x <4 x i32>] addrspace(4)* inreg, [34 x <8 x i32>] addrspace(4)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) { entry: %tmp0 = fcmp olt float %13, 0.0 call void @llvm.amdgcn.kill(i1 %tmp0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll index 16a463d6d83..91e52d7cecb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll @@ -185,7 +185,7 @@ bb: ; GCN-LABEL: {{^}}v_interp_p1_bank16_bug: ; 16BANK-NOT: v_interp_p1_f32{{(_e32)*}} [[DST:v[0-9]+]], [[DST]] -define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(4)* byval %arg, [17 x <16 x i8>] addrspace(4)* byval %arg13, [17 x <4 x i32>] addrspace(4)* byval %arg14, [34 x <8 x i32>] addrspace(4)* byval %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) #0 { +define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(4)* inreg %arg, [17 x <16 x i8>] addrspace(4)* inreg %arg13, [17 x <4 x i32>] addrspace(4)* inreg %arg14, [34 x <8 x i32>] addrspace(4)* inreg %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) #0 { main_body: %i.i = extractelement <2 x i32> %arg19, i32 0 %j.i = extractelement <2 x i32> %arg19, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/mubuf.ll b/llvm/test/CodeGen/AMDGPU/mubuf.ll index 9cc48b41d6d..b28e8fa72ec 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf.ll @@ -55,7 +55,7 @@ entry: ; CHECK-LABEL: {{^}}soffset_max_imm: ; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc -define amdgpu_gs void @soffset_max_imm([6 x <4 x i32>] addrspace(4)* byval, [17 x <4 x i32>] addrspace(4)* byval, [16 x <4 x i32>] addrspace(4)* byval, [32 x <8 x i32>] addrspace(4)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { +define amdgpu_gs void @soffset_max_imm([6 x <4 x i32>] addrspace(4)* inreg, [17 x <4 x i32>] addrspace(4)* inreg, [16 x <4 x i32>] addrspace(4)* inreg, [32 x <8 x i32>] addrspace(4)* inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { main_body: %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0 %tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0 @@ -74,7 +74,7 @@ main_body: ; CHECK-LABEL: {{^}}soffset_no_fold: ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41 ; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc -define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(4)* byval, [17 x <4 x i32>] addrspace(4)* byval, [16 x <4 x i32>] addrspace(4)* byval, [32 x <8 x i32>] addrspace(4)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { +define amdgpu_gs void @soffset_no_fold([6 x <4 x i32>] addrspace(4)* inreg, [17 x <4 x i32>] addrspace(4)* inreg, [16 x <4 x i32>] addrspace(4)* inreg, [32 x <8 x i32>] addrspace(4)* inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) { main_body: %tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0 %tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0 diff --git a/llvm/test/CodeGen/AMDGPU/ret.ll b/llvm/test/CodeGen/AMDGPU/ret.ll index b7d202638db..e77e2122068 100644 --- a/llvm/test/CodeGen/AMDGPU/ret.ll +++ b/llvm/test/CodeGen/AMDGPU/ret.ll @@ -7,7 +7,7 @@ ; GCN: s_waitcnt expcnt(0) ; GCN: v_add_f32_e32 v0, 1.0, v1 ; GCN-NOT: s_endpgm -define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { +define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 %x = fadd float %arg3, 1.000000e+00 @@ -25,7 +25,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v3, -1.0 ; GCN-DAG: s_waitcnt expcnt(0) ; GCN-NOT: s_endpgm -define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { +define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 } @@ -42,7 +42,7 @@ bb: ; GCN: v_mov_b32_e32 v3, v4 ; GCN: v_mov_b32_e32 v4, v6 ; GCN-NOT: s_endpgm -define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { +define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { bb: %i0 = extractelement <2 x i32> %arg4, i32 0 %i1 = extractelement <2 x i32> %arg4, i32 1 @@ -67,7 +67,7 @@ bb: ; GCN-LABEL: {{^}}ps_input_ena_no_inputs: ; GCN: v_mov_b32_e32 v0, 1.0 ; GCN-NOT: s_endpgm -define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { +define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { bb: ret float 1.000000e+00 } @@ -81,7 +81,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v1, v2 ; GCN: v_mov_b32_e32 v2, v3 ; GCN-NOT: s_endpgm -define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { +define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 { bb: %f = bitcast <2 x i32> %arg8 to <2 x float> %s = insertvalue { float, <2 x float> } undef, float %arg14, 0 @@ -100,7 +100,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v3, v6 ; GCN-DAG: v_mov_b32_e32 v4, v8 ; GCN-NOT: s_endpgm -define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 { +define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 { bb: %i0 = extractelement <2 x i32> %arg4, i32 0 %i1 = extractelement <2 x i32> %arg4, i32 1 @@ -129,7 +129,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v3, v8 ; GCN-DAG: v_mov_b32_e32 v4, v12 ; GCN-NOT: s_endpgm -define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 { +define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 { bb: %i0 = extractelement <2 x i32> %arg4, i32 0 %i1 = extractelement <2 x i32> %arg4, i32 1 @@ -158,7 +158,7 @@ bb: ; GCN: v_mov_b32_e32 v3, v4 ; GCN: v_mov_b32_e32 v4, v8 ; GCN-NOT: s_endpgm -define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 { +define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 { bb: %i0 = extractelement <2 x i32> %arg4, i32 0 %i1 = extractelement <2 x i32> %arg4, i32 1 @@ -180,7 +180,7 @@ bb: ; GCN: s_mov_b32 s2, s3 ; GCN: s_add_i32 s0, s3, 2 ; GCN-NOT: s_endpgm -define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { +define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: %x = add i32 %arg2, 2 %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0 @@ -196,7 +196,7 @@ bb: ; GCN-DAG: s_mov_b32 s2, 7 ; GCN-DAG: s_mov_b32 s3, 8 ; GCN-NOT: s_endpgm -define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { +define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: %x = add i32 %arg2, 2 ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 } @@ -211,7 +211,7 @@ bb: ; GCN-DAG: s_add_i32 s0, s3, 2 ; GCN-DAG: s_mov_b32 s2, s3 ; GCN-NOT: s_endpgm -define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { +define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 %v = fadd float %arg3, 1.000000e+00 @@ -233,7 +233,7 @@ bb: ; GCN-DAG: v_mov_b32_e32 v1, 2.0 ; GCN-DAG: v_mov_b32_e32 v2, 4.0 ; GCN-DAG: s_waitcnt expcnt(0) -define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { +define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0 ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } } diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll index 894c9c9670e..ffa851919f7 100644 --- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll +++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll @@ -22,7 +22,7 @@ ; GCN-NEXT: [[RET_BB]]: ; GCN-NEXT: ; return ; GCN-NEXT: .Lfunc_end0 -define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_trivial_ret_divergent_br_trivial_unreachable([9 x <4 x i32>] addrspace(4)* byval %arg, [17 x <4 x i32>] addrspace(4)* byval %arg1, [17 x <8 x i32>] addrspace(4)* byval %arg2, i32 addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, i32 inreg %arg17, i32 %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 { +define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_trivial_ret_divergent_br_trivial_unreachable([9 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [17 x <8 x i32>] addrspace(4)* inreg %arg2, i32 addrspace(4)* inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, i32 inreg %arg17, i32 %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 { entry: %i.i = extractelement <2 x i32> %arg7, i32 0 %j.i = extractelement <2 x i32> %arg7, i32 1 @@ -73,7 +73,7 @@ ret.bb: ; preds = %else, %main_body ; GCN-NEXT: s_waitcnt ; GCN-NEXT: ; return ; GCN-NEXT: .Lfunc_end -define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <4 x i32>] addrspace(4)* byval %arg, [17 x <4 x i32>] addrspace(4)* byval %arg1, [17 x <8 x i32>] addrspace(4)* byval %arg2, i32 addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 { +define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [17 x <8 x i32>] addrspace(4)* inreg %arg2, i32 addrspace(4)* inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 { main_body: %i.i = extractelement <2 x i32> %arg7, i32 0 %j.i = extractelement <2 x i32> %arg7, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll index 574ce5f6c88..cac9163aa84 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll @@ -221,7 +221,7 @@ ENDIF: ; preds = %LOOP ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}} ; CHECK: exp ; CHECK: s_endpgm -define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 { +define amdgpu_ps void @sample_v3([17 x <4 x i32>] addrspace(4)* inreg %arg, [32 x <4 x i32>] addrspace(4)* inreg %arg1, [16 x <8 x i32>] addrspace(4)* inreg %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 { entry: %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0 %tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0 @@ -285,7 +285,7 @@ endif: ; preds = %if1, %if0, %entry ; This test is just checking that we don't crash / assertion fail. ; CHECK-LABEL: {{^}}copy2: ; CHECK: s_endpgm -define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 { +define amdgpu_ps void @copy2([17 x <4 x i32>] addrspace(4)* inreg %arg, [32 x <4 x i32>] addrspace(4)* inreg %arg1, [16 x <8 x i32>] addrspace(4)* inreg %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 { entry: br label %LOOP68 @@ -321,7 +321,7 @@ ENDIF69: ; preds = %LOOP68 ; [[END]]: ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}} ; CHECK: s_endpgm -define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(4)* byval %arg, [17 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <4 x i32>] addrspace(4)* byval %arg2, [32 x <8 x i32>] addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 { +define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [16 x <4 x i32>] addrspace(4)* inreg %arg2, [32 x <8 x i32>] addrspace(4)* inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 { bb: %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i32 0, i32 0 %tmp22 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !3 @@ -371,7 +371,7 @@ bb71: ; preds = %bb80, %bb38 ; Check the resource descriptor is stored in an sgpr. ; CHECK-LABEL: {{^}}mimg_srsrc_sgpr: ; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 -define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(4)* byval %arg) #0 { +define amdgpu_ps void @mimg_srsrc_sgpr([34 x <8 x i32>] addrspace(4)* inreg %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i32 0, i32 %tid @@ -386,7 +386,7 @@ bb: ; Check the sampler is stored in an sgpr. ; CHECK-LABEL: {{^}}mimg_ssamp_sgpr: ; CHECK: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 -define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(4)* byval %arg) #0 { +define amdgpu_ps void @mimg_ssamp_sgpr([17 x <4 x i32>] addrspace(4)* inreg %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i32 0, i32 %tid diff --git a/llvm/test/CodeGen/AMDGPU/si-scheduler.ll b/llvm/test/CodeGen/AMDGPU/si-scheduler.ll index 76bfa178653..b76eb7c3faf 100644 --- a/llvm/test/CodeGen/AMDGPU/si-scheduler.ll +++ b/llvm/test/CodeGen/AMDGPU/si-scheduler.ll @@ -17,7 +17,7 @@ ; CHECK: s_waitcnt vmcnt(0) ; CHECK: exp ; CHECK: s_endpgm -define amdgpu_ps void @main([6 x <16 x i8>] addrspace(4)* byval %arg, [17 x <16 x i8>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 { +define amdgpu_ps void @main([6 x <16 x i8>] addrspace(4)* inreg %arg, [17 x <16 x i8>] addrspace(4)* inreg %arg1, [17 x <4 x i32>] addrspace(4)* inreg %arg2, [34 x <8 x i32>] addrspace(4)* inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 { main_body: %tmp = bitcast [34 x <8 x i32>] addrspace(4)* %arg3 to <32 x i8> addrspace(4)* %tmp22 = load <32 x i8>, <32 x i8> addrspace(4)* %tmp, align 32, !tbaa !0 diff --git a/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll index 54b4b4d2586..cfe05e8e654 100644 --- a/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll @@ -19,7 +19,7 @@ ; GCN: s_endpgm ; TOVGPR: ScratchSize: 0{{$}} -define amdgpu_ps void @main([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) { +define amdgpu_ps void @main([17 x <4 x i32>] addrspace(4)* inreg %arg, [32 x <4 x i32>] addrspace(4)* inreg %arg1, [16 x <8 x i32>] addrspace(4)* inreg %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) { main_body: %lds = inttoptr i32 0 to [64 x i32] addrspace(3)* %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0 @@ -639,7 +639,7 @@ ENDIF66: ; preds = %LOOP65 ; GCN-LABEL: {{^}}main1: ; GCN: s_endpgm ; TOVGPR: ScratchSize: 0{{$}} -define amdgpu_ps void @main1([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <8 x i32>] addrspace(4)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 { +define amdgpu_ps void @main1([17 x <4 x i32>] addrspace(4)* inreg %arg, [32 x <4 x i32>] addrspace(4)* inreg %arg1, [16 x <8 x i32>] addrspace(4)* inreg %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 { main_body: %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i64 0, i32 0 %tmp21 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0 diff --git a/llvm/test/CodeGen/AMDGPU/split-smrd.ll b/llvm/test/CodeGen/AMDGPU/split-smrd.ll index 8fa272762ce..a79f41ae10b 100644 --- a/llvm/test/CodeGen/AMDGPU/split-smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/split-smrd.ll @@ -6,7 +6,7 @@ ; GCN-LABEL: {{^}}split_smrd_add_worklist: ; GCN: image_sample v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] dmask:0x1 -define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(4)* byval %arg) #0 { +define amdgpu_ps void @split_smrd_add_worklist([34 x <8 x i32>] addrspace(4)* inreg %arg) #0 { bb: %tmp = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> undef, i32 96, i32 0) %tmp1 = bitcast float %tmp to i32 diff --git a/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll b/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll index 56fc08611aa..e4051e4d647 100644 --- a/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll @@ -9,7 +9,7 @@ ; ; Check for a valid output. ; CHECK: image_sample_c -define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(4)* byval dereferenceable(18446744073709551615) %arg, [16 x <16 x i8>] addrspace(4)* byval dereferenceable(18446744073709551615) %arg1, [32 x <8 x i32>] addrspace(4)* byval dereferenceable(18446744073709551615) %arg2, [16 x <8 x i32>] addrspace(4)* byval dereferenceable(18446744073709551615) %arg3, [16 x <4 x i32>] addrspace(4)* byval dereferenceable(18446744073709551615) %arg4, float inreg %arg5, i32 inreg %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <3 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, <2 x i32> %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, i32 %arg20, float %arg21, i32 %arg22) #0 { +define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(4)* inreg dereferenceable(18446744073709551615) %arg, [16 x <16 x i8>] addrspace(4)* inreg dereferenceable(18446744073709551615) %arg1, [32 x <8 x i32>] addrspace(4)* inreg dereferenceable(18446744073709551615) %arg2, [16 x <8 x i32>] addrspace(4)* inreg dereferenceable(18446744073709551615) %arg3, [16 x <4 x i32>] addrspace(4)* inreg dereferenceable(18446744073709551615) %arg4, float inreg %arg5, i32 inreg %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, <3 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, <2 x i32> %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, i32 %arg20, float %arg21, i32 %arg22) #0 { main_body: %i.i = extractelement <2 x i32> %arg8, i32 0 %j.i = extractelement <2 x i32> %arg8, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll index 6fb3ed41681..4a94ce1fc55 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -27,7 +27,7 @@ ; GCN: NumVgprs: 256 ; GCN: ScratchSize: 1536 -define amdgpu_vs void @main([9 x <4 x i32>] addrspace(4)* byval %arg, [17 x <4 x i32>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, [16 x <4 x i32>] addrspace(4)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 { +define amdgpu_vs void @main([9 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [17 x <4 x i32>] addrspace(4)* inreg %arg2, [34 x <8 x i32>] addrspace(4)* inreg %arg3, [16 x <4 x i32>] addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 { bb: %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i64 0, i64 0 %tmp11 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, align 16, !tbaa !0 diff --git a/llvm/test/CodeGen/AMDGPU/wait.ll b/llvm/test/CodeGen/AMDGPU/wait.ll index b45874e1e4c..12dc7cc7a13 100644 --- a/llvm/test/CodeGen/AMDGPU/wait.ll +++ b/llvm/test/CodeGen/AMDGPU/wait.ll @@ -46,7 +46,7 @@ main_body: ; ILPMAX: exp pos0 ; ILPMAX-NEXT: exp param0 ; ILPMAX: s_endpgm -define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(4)* byval %arg, [17 x <16 x i8>] addrspace(4)* byval %arg1, [17 x <4 x i32>] addrspace(4)* byval %arg2, [34 x <8 x i32>] addrspace(4)* byval %arg3, [16 x <16 x i8>] addrspace(4)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 { +define amdgpu_vs void @main2([6 x <16 x i8>] addrspace(4)* inreg %arg, [17 x <16 x i8>] addrspace(4)* inreg %arg1, [17 x <4 x i32>] addrspace(4)* inreg %arg2, [34 x <8 x i32>] addrspace(4)* inreg %arg3, [16 x <16 x i8>] addrspace(4)* inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 { main_body: %tmp = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(4)* %arg4, i64 0, i64 0 %tmp11 = load <16 x i8>, <16 x i8> addrspace(4)* %tmp, align 16, !tbaa !0 |