summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h1
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h8
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll30
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll39
8 files changed, 90 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index c68e5861ff2..551737c1d27 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -156,8 +156,9 @@ static StringRef intrinsicToAttrName(Intrinsic::ID ID,
case Intrinsic::amdgcn_dispatch_id:
return "amdgpu-dispatch-id";
case Intrinsic::amdgcn_kernarg_segment_ptr:
- case Intrinsic::amdgcn_implicitarg_ptr:
return "amdgpu-kernarg-segment-ptr";
+ case Intrinsic::amdgcn_implicitarg_ptr:
+ return "amdgpu-implicitarg-ptr";
case Intrinsic::amdgcn_queue_ptr:
case Intrinsic::trap:
case Intrinsic::debugtrap:
@@ -190,7 +191,8 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
{ "amdgpu-work-group-id-z" },
{ "amdgpu-dispatch-ptr" },
{ "amdgpu-dispatch-id" },
- { "amdgpu-kernarg-segment-ptr" }
+ { "amdgpu-kernarg-segment-ptr" },
+ { "amdgpu-implicitarg-ptr" }
};
if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 389fdc9d636..2737ef9b2ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -764,7 +764,8 @@ public:
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
- unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
+ unsigned getKernArgSegmentSize(const MachineFunction &MF,
+ unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 56db67c20f4..9fb1bdb90f0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -899,6 +899,13 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
DAG.getConstant(Offset, SL, PtrVT));
}
+SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
+ const SDLoc &SL) const {
+ auto MFI = DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>();
+ uint64_t Offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
+ return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset);
+}
+
SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Val,
bool Signed,
@@ -3029,8 +3036,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
TRI->getPreloadedValue(MF, Reg), VT);
}
case Intrinsic::amdgcn_implicitarg_ptr: {
- unsigned offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT);
- return lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), offset);
+ if (MFI->isEntryFunction())
+ return getImplicitArgPtr(DAG, DL);
+ report_fatal_error("amdgcn.implicitarg.ptr not implemented for functions");
}
case Intrinsic::amdgcn_kernarg_segment_ptr: {
unsigned Reg
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 30482dc6a0a..b703cedf743 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -23,6 +23,7 @@ namespace llvm {
class SITargetLowering final : public AMDGPUTargetLowering {
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
SDValue Chain, uint64_t Offset) const;
+ SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
uint64_t Offset, bool Signed,
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index a7c8166ff6d..04e57bedb21 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -93,11 +93,17 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
// FIXME: Not really a system SGPR.
PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg;
+ if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
+ ImplicitArgPtr = true;
+ } else {
+ if (F->hasFnAttribute("amdgpu-implicitarg-ptr"))
+ KernargSegmentPtr = true;
}
CallingConv::ID CC = F->getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
- KernargSegmentPtr = !F->arg_empty();
+ if (!F->arg_empty())
+ KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
} else if (CC == CallingConv::AMDGPU_PS) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 4c7f38a09a4..8511403ebc3 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -186,6 +186,10 @@ private:
// Other shaders indirect 64-bits at sgpr[0:1]
bool ImplicitBufferPtr : 1;
+ // Pointer to where the ABI inserts special kernel arguments separate from the
+ // user arguments. This is an offset from the KernargSegmentPtr.
+ bool ImplicitArgPtr : 1;
+
MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -346,6 +350,10 @@ public:
return WorkItemIDZ;
}
+ bool hasImplicitArgPtr() const {
+ return ImplicitArgPtr;
+ }
+
bool hasImplicitBufferPtr() const {
return ImplicitBufferPtr;
}
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index e9797eff712..51cd6c43e03 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -237,52 +237,59 @@ define void @func_indirect_use_kernarg_segment_ptr() #1 {
ret void
}
-; HSA: define void @use_implicitarg_ptr() #14 {
+; HSA: define amdgpu_kernel void @kern_use_implicitarg_ptr() #15 {
+define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
+ %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
+ ret void
+}
+
+; HSA: define void @use_implicitarg_ptr() #15 {
define void @use_implicitarg_ptr() #1 {
%implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
store volatile i8 addrspace(2)* %implicitarg.ptr, i8 addrspace(2)* addrspace(1)* undef
ret void
}
-; HSA: define void @func_indirect_use_implicitarg_ptr() #14 {
+; HSA: define void @func_indirect_use_implicitarg_ptr() #15 {
define void @func_indirect_use_implicitarg_ptr() #1 {
call void @use_implicitarg_ptr()
ret void
}
-; HSA: declare void @external.func() #15
+; HSA: declare void @external.func() #16
declare void @external.func() #3
-; HSA: define internal void @defined.func() #15 {
+; HSA: define internal void @defined.func() #16 {
define internal void @defined.func() #3 {
ret void
}
-; HSA: define void @func_call_external() #15 {
+; HSA: define void @func_call_external() #16 {
define void @func_call_external() #3 {
call void @external.func()
ret void
}
-; HSA: define void @func_call_defined() #15 {
+; HSA: define void @func_call_defined() #16 {
define void @func_call_defined() #3 {
call void @defined.func()
ret void
}
-; HSA: define void @func_call_asm() #15 {
+; HSA: define void @func_call_asm() #16 {
define void @func_call_asm() #3 {
call void asm sideeffect "", ""() #3
ret void
}
-; HSA: define amdgpu_kernel void @kern_call_external() #16 {
+; HSA: define amdgpu_kernel void @kern_call_external() #17 {
define amdgpu_kernel void @kern_call_external() #3 {
call void @external.func()
ret void
}
-; HSA: define amdgpu_kernel void @func_kern_defined() #16 {
+; HSA: define amdgpu_kernel void @func_kern_defined() #17 {
define amdgpu_kernel void @func_kern_defined() #3 {
call void @defined.func()
ret void
@@ -308,5 +315,6 @@ attributes #3 = { nounwind }
; HSA: attributes #12 = { nounwind "target-cpu"="gfx900" }
; HSA: attributes #13 = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" }
; HSA: attributes #14 = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" }
-; HSA: attributes #15 = { nounwind }
-; HSA: attributes #16 = { nounwind "amdgpu-flat-scratch" }
+; HSA: attributes #15 = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
+; HSA: attributes #16 = { nounwind }
+; HSA: attributes #17 = { nounwind "amdgpu-flat-scratch" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
new file mode 100644
index 00000000000..7edb8533672
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-NOENV %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-OPENCL %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s
+
+; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
+; GCN: enable_sgpr_kernarg_segment_ptr = 1
+
+; HSA-NOENV: kernarg_segment_byte_size = 0
+; HSA-OPENCL: kernarg_segment_byte_size = 32
+; MESA: kernarg_segment_byte_size = 16
+
+; HSA: s_load_dword s0, s[4:5], 0x0
+define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
+ %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ %cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
+ %load = load volatile i32, i32 addrspace(2)* %cast
+ ret void
+}
+
+; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
+; GCN: enable_sgpr_kernarg_segment_ptr = 1
+
+; HSA-NOENV: kernarg_segment_byte_size = 112
+; HSA-OPENCL: kernarg_segment_byte_size = 144
+; MESA: kernarg_segment_byte_size = 464
+
+; HSA: s_load_dword s0, s[4:5], 0x1c
+define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
+ %implicitarg.ptr = call i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
+ %cast = bitcast i8 addrspace(2)* %implicitarg.ptr to i32 addrspace(2)*
+ %load = load volatile i32, i32 addrspace(2)* %cast
+ ret void
+}
+
+declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() #2
+
+attributes #0 = { nounwind noinline }
+attributes #1 = { nounwind noinline }
+attributes #2 = { nounwind readnone speculatable }
OpenPOWER on IntegriCloud