summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-05-29 19:35:00 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-05-29 19:35:00 +0000
commit1ea0402e82f6b159a8e95e29fd33472efa17169b (patch)
treed9fce38de26dd111cc57425432af70a8d469c3d5 /llvm
parent97684419e8306e8a80b93cfb56af20e73d0898a3 (diff)
downloadbcm5719-llvm-1ea0402e82f6b159a8e95e29fd33472efa17169b.tar.gz
bcm5719-llvm-1ea0402e82f6b159a8e95e29fd33472efa17169b.zip
AMDGPU: Round up kernel argument allocation size
AFAIK the driver's allocation will actually have to round this up anyway. It is useful to track the rounded up size, so that the end of the kernel segment is known to be dereferencable so a wider s_load_dword can be used for a short argument at the end of the segment. llvm-svn: 333456
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp5
-rw-r--r--llvm/test/CodeGen/AMDGPU/kernel-args.ll53
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll48
5 files changed, 111 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 6f50fca8831..bcc0e77a545 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -20,6 +20,7 @@ class AMDGPUMachineFunction : public MachineFunctionInfo {
/// local memory space.
SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
+protected:
uint64_t KernArgSize;
unsigned MaxKernArgAlign;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index a52b1137203..97fc6493b95 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -414,12 +414,16 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
unsigned SISubtarget::getKernArgSegmentSize(const Function &F,
unsigned ExplicitArgBytes) const {
+ uint64_t TotalSize = ExplicitArgBytes;
unsigned ImplicitBytes = getImplicitArgNumBytes(F);
- if (ImplicitBytes == 0)
- return ExplicitArgBytes;
- unsigned Alignment = getAlignmentForImplicitArgPtr();
- return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+ if (ImplicitBytes != 0) {
+ unsigned Alignment = getAlignmentForImplicitArgPtr();
+ TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+ }
+
+ // Being able to dereference past the end is useful for emitting scalar loads.
+ return alignTo(TotalSize, 4);
}
unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 495a8534865..61b6cb33fd1 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -71,8 +71,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
ImplicitArgPtr = true;
} else {
- if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
+ if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
KernargSegmentPtr = true;
+ assert(MaxKernArgAlign == 0);
+ MaxKernArgAlign = ST.getAlignmentForImplicitArgPtr();
+ }
}
CallingConv::ID CC = F.getCallingConv();
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index 8e9abb9de8b..f51366f2665 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -5,6 +5,7 @@
; RUN: llc < %s -march=r600 -mcpu=cayman -verify-machineinstrs | FileCheck -enable-var-scope --check-prefix=EG --check-prefix=FUNC %s
; FUNC-LABEL: {{^}}i8_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
@@ -25,6 +26,7 @@ entry:
}
; FUNC-LABEL: {{^}}i8_zext_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
@@ -44,6 +46,7 @@ entry:
}
; FUNC-LABEL: {{^}}i8_sext_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
@@ -63,7 +66,9 @@ entry:
}
; FUNC-LABEL: {{^}}i16_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
+
; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
@@ -83,7 +88,9 @@ entry:
}
; FUNC-LABEL: {{^}}i16_zext_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
+
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
@@ -102,7 +109,9 @@ entry:
}
; FUNC-LABEL: {{^}}i16_sext_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
+
; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
@@ -121,7 +130,9 @@ entry:
}
; FUNC-LABEL: {{^}}i32_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
+
; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
@@ -133,6 +144,7 @@ entry:
}
; FUNC-LABEL: {{^}}f32_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
@@ -145,7 +157,9 @@ entry:
}
; FUNC-LABEL: {{^}}v2i8_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
+
; EG: VTX_READ_8
; EG: VTX_READ_8
; MESA-GCN: buffer_load_ubyte
@@ -159,7 +173,9 @@ entry:
}
; FUNC-LABEL: {{^}}v2i16_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
+
; EG: VTX_READ_16
; EG: VTX_READ_16
@@ -174,7 +190,9 @@ entry:
}
; FUNC-LABEL: {{^}}v2i32_arg:
+; HSA-VI: kernarg_segment_byte_size = 16
; HSA-VI: kernarg_segment_alignment = 4
+
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
@@ -187,7 +205,9 @@ entry:
}
; FUNC-LABEL: {{^}}v2f32_arg:
+; HSA-VI: kernarg_segment_byte_size = 16
; HSA-VI: kernarg_segment_alignment = 4
+
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
@@ -200,7 +220,9 @@ entry:
}
; FUNC-LABEL: {{^}}v3i8_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
+
; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
@@ -217,7 +239,9 @@ entry:
}
; FUNC-LABEL: {{^}}v3i16_arg:
+; HSA-VI: kernarg_segment_byte_size = 16
; HSA-VI: kernarg_segment_alignment = 4
+
; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
@@ -233,6 +257,7 @@ entry:
ret void
}
; FUNC-LABEL: {{^}}v3i32_arg:
+; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
@@ -247,6 +272,7 @@ entry:
}
; FUNC-LABEL: {{^}}v3f32_arg:
+; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
@@ -261,6 +287,7 @@ entry:
}
; FUNC-LABEL: {{^}}v4i8_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
; HSA-VI: kernarg_segment_alignment = 4
; EG: VTX_READ_8
; EG: VTX_READ_8
@@ -281,6 +308,7 @@ entry:
}
; FUNC-LABEL: {{^}}v4i16_arg:
+; HSA-VI: kernarg_segment_byte_size = 16
; HSA-VI: kernarg_segment_alignment = 4
; EG: VTX_READ_16
; EG: VTX_READ_16
@@ -301,6 +329,7 @@ entry:
}
; FUNC-LABEL: {{^}}v4i32_arg:
+; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
@@ -317,6 +346,7 @@ entry:
}
; FUNC-LABEL: {{^}}v4f32_arg:
+; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
@@ -332,6 +362,7 @@ entry:
}
; FUNC-LABEL: {{^}}v8i8_arg:
+; HSA-VI: kernarg_segment_byte_size = 16
; HSA-VI: kernarg_segment_alignment = 4
; EG: VTX_READ_8
; EG: VTX_READ_8
@@ -363,6 +394,7 @@ entry:
}
; FUNC-LABEL: {{^}}v8i16_arg:
+; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
; EG: VTX_READ_16
; EG: VTX_READ_16
@@ -393,6 +425,7 @@ entry:
}
; FUNC-LABEL: {{^}}v8i32_arg:
+; HSA-VI: kernarg_segment_byte_size = 64
; HSA-VI: kernarg_segment_alignment = 5
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
@@ -412,6 +445,7 @@ entry:
}
; FUNC-LABEL: {{^}}v8f32_arg:
+; HSA-VI: kernarg_segment_byte_size = 64
; HSA-VI: kernarg_segment_alignment = 5
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
@@ -429,6 +463,7 @@ entry:
}
; FUNC-LABEL: {{^}}v16i8_arg:
+; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
; EG: VTX_READ_8
; EG: VTX_READ_8
@@ -485,6 +520,7 @@ entry:
}
; FUNC-LABEL: {{^}}v16i16_arg:
+; HSA-VI: kernarg_segment_byte_size = 64
; HSA-VI: kernarg_segment_alignment = 5
; EG: VTX_READ_16
; EG: VTX_READ_16
@@ -535,6 +571,7 @@ entry:
}
; FUNC-LABEL: {{^}}v16i32_arg:
+; HSA-VI: kernarg_segment_byte_size = 128
; HSA-VI: kernarg_segment_alignment = 6
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
@@ -562,6 +599,7 @@ entry:
}
; FUNC-LABEL: {{^}}v16f32_arg:
+; HSA-VI: kernarg_segment_byte_size = 128
; HSA-VI: kernarg_segment_alignment = 6
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
@@ -621,6 +659,9 @@ entry:
; }
; FUNC-LABEL: {{^}}i1_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
+; HSA-VI: kernarg_segment_alignment = 4
+
; SI: buffer_load_ubyte
; SI: v_and_b32_e32
; SI: buffer_store_byte
@@ -631,6 +672,9 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
}
; FUNC-LABEL: {{^}}i1_arg_zext_i32:
+; HSA-VI: kernarg_segment_byte_size = 12
+; HSA-VI: kernarg_segment_alignment = 4
+
; SI: buffer_load_ubyte
; SI: buffer_store_dword
; SI: s_endpgm
@@ -641,6 +685,9 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
}
; FUNC-LABEL: {{^}}i1_arg_zext_i64:
+; HSA-VI: kernarg_segment_byte_size = 12
+; HSA-VI: kernarg_segment_alignment = 4
+
; SI: buffer_load_ubyte
; SI: buffer_store_dwordx2
; SI: s_endpgm
@@ -651,6 +698,9 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
}
; FUNC-LABEL: {{^}}i1_arg_sext_i32:
+; HSA-VI: kernarg_segment_byte_size = 12
+; HSA-VI: kernarg_segment_alignment = 4
+
; SI: buffer_load_ubyte
; SI: buffer_store_dword
; SI: s_endpgm
@@ -661,6 +711,9 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
}
; FUNC-LABEL: {{^}}i1_arg_sext_i64:
+; HSA-VI: kernarg_segment_byte_size = 12
+; HSA-VI: kernarg_segment_alignment = 4
+
; SI: buffer_load_ubyte
; SI: v_bfe_i32
; SI: v_ashrrev_i32
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
index 772c155ea52..6c1bc9eaa76 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -4,6 +4,9 @@
; ALL-LABEL: {{^}}test:
; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: kernarg_segment_byte_size = 8
+; HSA: kernarg_segment_alignment = 4
+
; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa
; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa
@@ -17,6 +20,10 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 {
}
; ALL-LABEL: {{^}}test_implicit:
+; HSA: kernarg_segment_byte_size = 8
+; OS-MESA3D: kernarg_segment_byte_size = 24
+; CO-V2: kernarg_segment_alignment = 4
+
; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15
define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
@@ -28,9 +35,12 @@ define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
ret void
}
-; ALL-LABEL: {{^}}test_implicit_alignment
-; HSA: kernarg_segment_byte_size = 10
+; ALL-LABEL: {{^}}test_implicit_alignment:
+; HSA: kernarg_segment_byte_size = 12
; OS-MESA3D: kernarg_segment_byte_size = 28
+; CO-V2: kernarg_segment_alignment = 4
+
+
; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
@@ -48,6 +58,9 @@ define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x
; ALL-LABEL: {{^}}opencl_test_implicit_alignment
; HSA: kernarg_segment_byte_size = 64
; OS-MESA3D: kernarg_segment_byte_size = 28
+; CO-V2: kernarg_segment_alignment = 4
+
+
; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
@@ -63,7 +76,11 @@ define amdgpu_kernel void @opencl_test_implicit_alignment(i32 addrspace(1)* %out
}
; ALL-LABEL: {{^}}test_no_kernargs:
-; HSA: enable_sgpr_kernarg_segment_ptr = 1
+; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: kernarg_segment_byte_size = 0
+; OS-MESA3D: kernarg_segment_byte_size = 16
+; CO-V2: kernarg_segment_alignment = 32
+
; HSA: s_load_dword s{{[0-9]+}}, s[4:5]
define amdgpu_kernel void @test_no_kernargs() #1 {
%kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
@@ -74,9 +91,34 @@ define amdgpu_kernel void @test_no_kernargs() #1 {
ret void
}
+; GCN-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs:
+; HSA: kernarg_segment_byte_size = 48
+; OS-MESA3d: kernarg_segment_byte_size = 16
+; CO-V2: kernarg_segment_alignment = 4
+define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 {
+ %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %val = load volatile i32, i32 addrspace(4)* %arg.ptr
+ store volatile i32 %val, i32 addrspace(1)* null
+ ret void
+}
+
+; GCN-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs_round_up:
+; HSA: kernarg_segment_byte_size = 40
+; OS-MESA3D: kernarg_segment_byte_size = 16
+; CO-V2: kernarg_segment_alignment = 4
+define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 {
+ %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %val = load volatile i32, i32 addrspace(4)* %arg.ptr
+ store volatile i32 %val, i32 addrspace(1)* null
+ ret void
+}
+
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
+attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" }
OpenPOWER on IntegriCloud