diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 8 |
6 files changed, 32 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index c68e5861ff2..551737c1d27 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -156,8 +156,9 @@ static StringRef intrinsicToAttrName(Intrinsic::ID ID, case Intrinsic::amdgcn_dispatch_id: return "amdgpu-dispatch-id"; case Intrinsic::amdgcn_kernarg_segment_ptr: - case Intrinsic::amdgcn_implicitarg_ptr: return "amdgpu-kernarg-segment-ptr"; + case Intrinsic::amdgcn_implicitarg_ptr: + return "amdgpu-implicitarg-ptr"; case Intrinsic::amdgcn_queue_ptr: case Intrinsic::trap: case Intrinsic::debugtrap: @@ -190,7 +191,8 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee, { "amdgpu-work-group-id-z" }, { "amdgpu-dispatch-ptr" }, { "amdgpu-dispatch-id" }, - { "amdgpu-kernarg-segment-ptr" } + { "amdgpu-kernarg-segment-ptr" }, + { "amdgpu-implicitarg-ptr" } }; if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 389fdc9d636..2737ef9b2ca 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -764,7 +764,8 @@ public: return getGeneration() >= AMDGPUSubtarget::GFX9; } - unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const; + unsigned getKernArgSegmentSize(const MachineFunction &MF, + unsigned ExplictArgBytes) const; /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 56db67c20f4..9fb1bdb90f0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -899,6 +899,13 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG, DAG.getConstant(Offset, SL, PtrVT)); } +SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG, + const SDLoc &SL) const { + auto MFI = DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>(); + uint64_t Offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT); + return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset); +} + SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Val, bool Signed, @@ -3029,8 +3036,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, TRI->getPreloadedValue(MF, Reg), VT); } case Intrinsic::amdgcn_implicitarg_ptr: { - unsigned offset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT); - return lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), offset); + if (MFI->isEntryFunction()) + return getImplicitArgPtr(DAG, DL); + report_fatal_error("amdgcn.implicitarg.ptr not implemented for functions"); } case Intrinsic::amdgcn_kernarg_segment_ptr: { unsigned Reg diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 30482dc6a0a..b703cedf743 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -23,6 +23,7 @@ namespace llvm { class SITargetLowering final : public AMDGPUTargetLowering { SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, uint64_t Offset) const; + SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const; SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain, uint64_t Offset, bool Signed, diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index a7c8166ff6d..04e57bedb21 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -93,11 +93,17 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) // FIXME: Not really a system SGPR. PrivateSegmentWaveByteOffsetSystemSGPR = ScratchWaveOffsetReg; + if (F->hasFnAttribute("amdgpu-implicitarg-ptr")) + ImplicitArgPtr = true; + } else { + if (F->hasFnAttribute("amdgpu-implicitarg-ptr")) + KernargSegmentPtr = true; } CallingConv::ID CC = F->getCallingConv(); if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) { - KernargSegmentPtr = !F->arg_empty(); + if (!F->arg_empty()) + KernargSegmentPtr = true; WorkGroupIDX = true; WorkItemIDX = true; } else if (CC == CallingConv::AMDGPU_PS) { diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 4c7f38a09a4..8511403ebc3 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -186,6 +186,10 @@ private: // Other shaders indirect 64-bits at sgpr[0:1] bool ImplicitBufferPtr : 1; + // Pointer to where the ABI inserts special kernel arguments separate from the + // user arguments. This is an offset from the KernargSegmentPtr. + bool ImplicitArgPtr : 1; + MCPhysReg getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); return AMDGPU::SGPR0 + NumUserSGPRs; @@ -346,6 +350,10 @@ public: return WorkItemIDZ; } + bool hasImplicitArgPtr() const { + return ImplicitArgPtr; + } + bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; } |