summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-07-28 15:52:08 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-07-28 15:52:08 +0000
commit9166ce86e83900d2671b66707461d05e7bf9739a (patch)
treecc046ec7ddb95ff6c24d879e833ef48f2db6a651
parent55c05e14afa4beb2f555c91c85296a18d296168c (diff)
downloadbcm5719-llvm-9166ce86e83900d2671b66707461d05e7bf9739a.tar.gz
bcm5719-llvm-9166ce86e83900d2671b66707461d05e7bf9739a.zip
AMDGPU: Annotate implicitarg.ptr usage
We need to pass something to functions for this to work. It isn't derivable just from the kernarg segment pointer because the implicit arguments are placed after the kernel arguments. Also fixes missing test for the intrinsic. llvm-svn: 309398
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h8
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll30
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll39
8 files changed, 90 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index c68e5861ff2..551737c1d27 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -156,8 +156,9 @@ static StringRef intrinsicToAttrName(Intrinsic::ID ID,
case Intrinsic::amdgcn_dispatch_id:
return "amdgpu-dispatch-id";
case Intrinsic::amdgcn_kernarg_segment_ptr:
- case Intrinsic::amdgcn_implicitarg_ptr:
return "amdgpu-kernarg-segment-ptr";
+ case Intrinsic::amdgcn_implicitarg_ptr:
+ return "amdgpu-implicitarg-ptr";
case Intrinsic::amdgcn_queue_ptr:
case Intrinsic::trap:
case Intrinsic::debugtrap:
@@ -190,7 +191,8 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
{ "amdgpu-work-group-id-z" },
{ "amdgpu-dispatch-ptr" },
{ "amdgpu-dispatch-id" },
- { "amdgpu-kernarg-segment-ptr" }
+ { "amdgpu-kernarg-segment-ptr" },
+ { "amdgpu-implicitarg-ptr" }
};
if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 389fdc9d636..2737ef9b2ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -764,7 +764,8 @@ public:
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
- unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
+ unsigned getKernArgSegmentSize(const MachineFunction &MF,
+ unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 56db67c20f4..9fb1bdb90f0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -899,6 +899,13 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
DAG.getConstant(Offset, SL, PtrVT));
}
+SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
+ const SDLoc &SL) const {
+ auto MFI = DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>();
+ uint64_t Offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
+ return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset);
+}
+
SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Val,
bool Signed,
@@ -3029,8 +3036,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
TRI->getPreloadedValue(MF, Reg), VT);
}
case Intrinsic::amdgcn_implicitarg_ptr: {
- unsigned offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
- return lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
+ if (MFI->isEntryFunction())
+ return getImplicitArgPtr(DAG, DL);
+ report_fatal_error("amdgcn.implicitarg.ptr not implemented for functions");
}
case Intrinsic::amdgcn_kernarg_segment_ptr: {
unsigned Reg
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 30482dc6a0a..b703cedf743 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -23,6 +23,7 @@ namespace llvm {
class SITargetLowering final : public AMDGPUTargetLowering {
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
SDValue Chain, uint64_t Offset) const;
+ SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
uint64_t Offset, bool Signed,
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index a7c8166ff6d..04e57bedb21 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -93,11 +93,17 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
// FIXME: Not really a system SGPR.
PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
+ if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
+ ImplicitArgPtr = true;
+ } else {
+ if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
+ KernargSegmentPtr = true;
}
CallingConv::ID CC = F->getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
- KernargSegmentPtr = !F->arg_empty();
+ if (!F->arg_empty())
+ KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
} else if (CC == CallingConv::AMDGPU_PS) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 4c7f38a09a4..8511403ebc3 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -186,6 +186,10 @@ private:
// Other shaders indirect 64-bits at sgpr[0:1]
bool ImplicitBufferPtr : 1;
+ // Pointer to where the ABI inserts special kernel arguments separate from the
+ // user arguments. This is an offset from the KernargSegmentPtr.
+ bool ImplicitArgPtr : 1;
+
MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -346,6 +350,10 @@ public:
return WorkItemIDZ;
}
+ bool hasImplicitArgPtr() const {
+ return ImplicitArgPtr;
+ }
+
bool hasImplicitBufferPtr() const {
return ImplicitBufferPtr;
}
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index e9797eff712..51cd6c43e03 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -237,52 +237,59 @@ define void @func_indirect_use_kernarg_segment_ptr() #1 {
ret void
}
-; HSA: define void @use_implicitarg_ptr() #14 {
+; HSA: define amdgpu_kernel void @kern_use_implicitarg_ptr() #15 {
+define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
+ %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @use_implicitarg_ptr() #15 {
define void @use_implicitarg_ptr() #1 {
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
ret void
}
-; HSA: define void @func_indirect_use_implicitarg_ptr() #14 {
+; HSA: define void @func_indirect_use_implicitarg_ptr() #15 {
define void @func_indirect_use_implicitarg_ptr() #1 {
call void @use_implicitarg_ptr()
ret void
}
-; HSA: declare void @external.func() #15
+; HSA: declare void @external.func() #16
declare void @external.func() #3
-; HSA: define internal void @defined.func() #15 {
+; HSA: define internal void @defined.func() #16 {
define internal void @defined.func() #3 {
ret void
}
-; HSA: define void @func_call_external() #15 {
+; HSA: define void @func_call_external() #16 {
define void @func_call_external() #3 {
call void @external.func()
ret void
}
-; HSA: define void @func_call_defined() #15 {
+; HSA: define void @func_call_defined() #16 {
define void @func_call_defined() #3 {
call void @defined.func()
ret void
}
-; HSA: define void @func_call_asm() #15 {
+; HSA: define void @func_call_asm() #16 {
define void @func_call_asm() #3 {
call void asm sideeffect "", ""() #3
ret void
}
-; HSA: define amdgpu_kernel void @kern_call_external() #16 {
+; HSA: define amdgpu_kernel void @kern_call_external() #17 {
define amdgpu_kernel void @kern_call_external() #3 {
call void @external.func()
ret void
}
-; HSA: define amdgpu_kernel void @func_kern_defined() #16 {
+; HSA: define amdgpu_kernel void @func_kern_defined() #17 {
define amdgpu_kernel void @func_kern_defined() #3 {
call void @defined.func()
ret void
@@ -308,5 +315,6 @@ attributes #3 = { nounwind }
; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" }
; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" }
; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" }
-; HSA: attributes #15 = { nounwind }
-; HSA: attributes #16 = { nounwind "amdgpu-flat-scratch" }
+; HSA: attributes #15 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
+; HSA: attributes #16 = { nounwind }
+; HSA: attributes #17 = { nounwind "amdgpu-flat-scratch" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
new file mode 100644
index 00000000000..7edb8533672
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-NOENV %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-OPENCL %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s
+
+; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
+; GCN: enable_sgpr_kernarg_segment_ptr = 1
+
+; HSA-NOENV: kernarg_segment_byte_size = 0
+; HSA-OPENCL: kernarg_segment_byte_size = 32
+; MESA: kernarg_segment_byte_size = 16
+
+; HSA: s_load_dword s0, s[4:5], 0x0
+define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
+ %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ %cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
+ %load = load volatile i32, i32 addrspace(2)* %cast
+ ret void
+}
+
+; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
+; GCN: enable_sgpr_kernarg_segment_ptr = 1
+
+; HSA-NOENV: kernarg_segment_byte_size = 112
+; HSA-OPENCL: kernarg_segment_byte_size = 144
+; MESA: kernarg_segment_byte_size = 464
+
+; HSA: s_load_dword s0, s[4:5], 0x1c
+define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
+ %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ %cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
+ %load = load volatile i32, i32 addrspace(2)* %cast
+ ret void
+}
+
+declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #2
+
+attributes #0 = { nounwind noinline }
+attributes #1 = { nounwind noinline }
+attributes #2 = { nounwind readnone speculatable }
OpenPOWER on IntegriCloud