diff options
5 files changed, 43 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 9aaa31c29fe..ee7ad3293d9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -37,6 +37,16 @@ using namespace llvm; +static unsigned findFirstFreeSGPR(CCState &CCInfo) { + unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); + for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { + if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) { + return AMDGPU::SGPR0 + Reg; + } + } + llvm_unreachable("Cannot allocate sgpr"); +} + SITargetLowering::SITargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI) : AMDGPUTargetLowering(TM, STI) { @@ -712,6 +722,15 @@ SDValue SITargetLowering::LowerFormalArguments( if (!AMDGPU::isShader(CallConv)) { getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins, Splits); + + assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()); + } else { + assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() && + !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() && + !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() && + !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() && + !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() && + !Info->hasWorkItemIDZ()); } // FIXME: How should these inputs interact with inreg / custom SGPR inputs? @@ -834,8 +853,7 @@ SDValue SITargetLowering::LowerFormalArguments( unsigned Reg = Info->addWorkGroupIDX(); MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass); CCInfo.AllocateReg(Reg); - } else - llvm_unreachable("work group id x is always enabled"); + } if (Info->hasWorkGroupIDY()) { unsigned Reg = Info->addWorkGroupIDY(); @@ -857,8 +875,13 @@ SDValue SITargetLowering::LowerFormalArguments( if (Info->hasPrivateSegmentWaveByteOffset()) { // Scratch wave offset passed in system SGPR. - unsigned PrivateSegmentWaveByteOffsetReg - = Info->addPrivateSegmentWaveByteOffset(); + unsigned PrivateSegmentWaveByteOffsetReg; + + if (AMDGPU::isShader(CallConv)) { + PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); + Info->setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); + } else + PrivateSegmentWaveByteOffsetReg = Info->addPrivateSegmentWaveByteOffset(); MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass); CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg); @@ -923,8 +946,7 @@ SDValue SITargetLowering::LowerFormalArguments( unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X); MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass); CCInfo.AllocateReg(Reg); - } else - llvm_unreachable("workitem id x should always be enabled"); + } if (Info->hasWorkItemIDY()) { unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index c56286e43ef..ef413cb1d1a 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -65,12 +65,12 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) GridWorkgroupCountX(false), GridWorkgroupCountY(false), GridWorkgroupCountZ(false), - WorkGroupIDX(true), + WorkGroupIDX(false), WorkGroupIDY(false), WorkGroupIDZ(false), WorkGroupInfo(false), PrivateSegmentWaveByteOffset(false), - WorkItemIDX(true), + WorkItemIDX(false), WorkItemIDY(false), WorkItemIDZ(false) { const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>(); @@ -80,8 +80,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - if (!AMDGPU::isShader(F->getCallingConv())) + if (!AMDGPU::isShader(F->getCallingConv())) { KernargSegmentPtr = true; + WorkGroupIDX = true; + WorkItemIDX = true; + } if (F->hasFnAttribute("amdgpu-work-group-id-y")) WorkGroupIDY = true; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 6de944a61d8..ac3497c31d1 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -164,6 +164,10 @@ public: return PrivateSegmentWaveByteOffsetSystemSGPR; } + void setPrivateSegmentWaveByteOffset(unsigned Reg) { + PrivateSegmentWaveByteOffsetSystemSGPR = Reg; + } + bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; } diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll index cec16250315..eb704c3b5f7 100644 --- a/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll +++ b/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll @@ -8,8 +8,8 @@ ; CI: s_mov_b32 s11, 0x98f000 ; VI: s_mov_b32 s11, 0x980000 -; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen -; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen +; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen ; ALL: ; ScratchSize: 32772 define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 { @@ -29,8 +29,8 @@ define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 { ; CI: s_mov_b32 s11, 0x98f000 ; VI: s_mov_b32 s11, 0x980000 -; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen -; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen +; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen +; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen ; ALL: ; ScratchSize: 32772 define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll index 28a86df9615..b755b786501 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -11,6 +11,7 @@ ; GCN-LABEL: {{^}}main: +; GCN: s_mov_b32 s11, s12 ; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 ; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; GCN-NEXT: s_mov_b32 s14, -1 |