diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-04-25 19:27:18 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-04-25 19:27:18 +0000 |
commit | 48ab526f12eca74e3dcf752c9f98dae3e92f173b (patch) | |
tree | 03ec0a3311d51ad76a6ee47400fba1735e024eb5 | |
parent | 621d3675cbb422283e6e858d778d3c2abfd5b929 (diff) | |
download | bcm5719-llvm-48ab526f12eca74e3dcf752c9f98dae3e92f173b.tar.gz bcm5719-llvm-48ab526f12eca74e3dcf752c9f98dae3e92f173b.zip |
AMDGPU: Add queue ptr intrinsic
llvm-svn: 267451
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll | 11 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll | 19 |
7 files changed, 52 insertions, 3 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 545dec75470..aad5d429f9d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -312,6 +312,10 @@ def int_amdgcn_dispatch_ptr : GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">, Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>; +def int_amdgcn_queue_ptr : + GCCBuiltin<"__builtin_amdgcn_queue_ptr">, + Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>; + // __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0> def int_amdgcn_interp_p1 : GCCBuiltin<"__builtin_amdgcn_interp_p1">, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 522da3a2c9f..b4b872e0666 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -104,7 +104,8 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { }; static const StringRef HSAIntrinsicToAttr[][2] = { - { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" } + { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" }, + { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" } }; // TODO: We should not add the attributes if the known compile time workgroup diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 8b4a442fd19..a5d1ec259e0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -745,6 +745,12 @@ SDValue SITargetLowering::LowerFormalArguments( CCInfo.AllocateReg(DispatchPtrReg); } + if (Info->hasQueuePtr()) { + unsigned QueuePtrReg = Info->addQueuePtr(*TRI); + MF.addLiveIn(QueuePtrReg, &AMDGPU::SReg_64RegClass); + CCInfo.AllocateReg(QueuePtrReg); + } + if (Info->hasKernargSegmentPtr()) { unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI); MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass); @@ -1450,6 +1456,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, switch (IntrinsicID) { case Intrinsic::amdgcn_dispatch_ptr: + case Intrinsic::amdgcn_queue_ptr: { if (!Subtarget->isAmdHsaOS()) { DiagnosticInfoUnsupported BadIntrin( *MF.getFunction(), "unsupported hsa intrinsic without hsa target", @@ -1458,8 +1465,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getUNDEF(VT); } + auto Reg = IntrinsicID == Intrinsic::amdgcn_dispatch_ptr ? + SIRegisterInfo::DISPATCH_PTR : SIRegisterInfo::QUEUE_PTR; return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, - TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT); + TRI->getPreloadedValue(MF, Reg), VT); + } case Intrinsic::amdgcn_rcp: return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_rsq: diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 43f4b9f7dde..d42c7c1e2ba 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -116,6 +116,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (F->hasFnAttribute("amdgpu-dispatch-ptr")) DispatchPtr = true; + + if (F->hasFnAttribute("amdgpu-queue-ptr")) + QueuePtr = true; } // We don't need to worry about accessing spills with flat instructions. diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index b981f5234b4..8f562b66cfb 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -917,7 +917,8 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF, assert(MFI->hasDispatchPtr()); return MFI->DispatchPtrUserSGPR; case SIRegisterInfo::QUEUE_PTR: - llvm_unreachable("not implemented"); + assert(MFI->hasQueuePtr()); + return MFI->QueuePtrUserSGPR; case SIRegisterInfo::WORKITEM_ID_X: assert(MFI->hasWorkItemIDX()); return AMDGPU::VGPR0; diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll index ffba75d40ab..10dac31fec0 100644 --- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll @@ -9,6 +9,7 @@ declare i32 @llvm.amdgcn.workitem.id.y() #0 declare i32 @llvm.amdgcn.workitem.id.z() #0 declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 +declare i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0 ; HSA: define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { @@ -154,6 +155,15 @@ define void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 { ret void } +; HSA: define void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 { +define void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 { + %dispatch.ptr = call i8 addrspace(2)* @llvm.amdgcn.queue.ptr() + %bc = bitcast i8 addrspace(2)* %dispatch.ptr to i32 addrspace(2)* + %val = load i32, i32 addrspace(2)* %bc + store i32 %val, i32 addrspace(1)* %ptr + ret void +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } @@ -168,3 +178,4 @@ attributes #1 = { nounwind } ; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } ; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } ; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" } +; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll new file mode 100644 index 00000000000..6bf871543ca --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: not llc -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s + +; ERROR: in function test{{.*}}: unsupported hsa intrinsic without hsa target + +; GCN-LABEL: {{^}}test: +; GCN: enable_sgpr_queue_ptr = 1 +; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 +define void @test(i32 addrspace(1)* %out) { + %queue_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0 + %header_ptr = bitcast i8 addrspace(2)* %queue_ptr to i32 addrspace(2)* + %value = load i32, i32 addrspace(2)* %header_ptr + store i32 %value, i32 addrspace(1)* %out + ret void +} + +declare noalias i8 addrspace(2)* @llvm.amdgcn.queue.ptr() #0 + +attributes #0 = { nounwind readnone } |