summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp34
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h4
-rw-r--r--llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll1
5 files changed, 43 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9aaa31c29fe..ee7ad3293d9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -37,6 +37,16 @@
using namespace llvm;
+static unsigned findFirstFreeSGPR(CCState &CCInfo) {
+ unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
+ for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
+ if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) {
+ return AMDGPU::SGPR0 + Reg;
+ }
+ }
+ llvm_unreachable("Cannot allocate sgpr");
+}
+
SITargetLowering::SITargetLowering(TargetMachine &TM,
const AMDGPUSubtarget &STI)
: AMDGPUTargetLowering(TM, STI) {
@@ -712,6 +722,15 @@ SDValue SITargetLowering::LowerFormalArguments(
if (!AMDGPU::isShader(CallConv)) {
getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
Splits);
+
+ assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
+ } else {
+ assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
+ !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
+ !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
+ !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
+ !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
+ !Info->hasWorkItemIDZ());
}
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
@@ -834,8 +853,7 @@ SDValue SITargetLowering::LowerFormalArguments(
unsigned Reg = Info->addWorkGroupIDX();
MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
CCInfo.AllocateReg(Reg);
- } else
- llvm_unreachable("work group id x is always enabled");
+ }
if (Info->hasWorkGroupIDY()) {
unsigned Reg = Info->addWorkGroupIDY();
@@ -857,8 +875,13 @@ SDValue SITargetLowering::LowerFormalArguments(
if (Info->hasPrivateSegmentWaveByteOffset()) {
// Scratch wave offset passed in system SGPR.
- unsigned PrivateSegmentWaveByteOffsetReg
- = Info->addPrivateSegmentWaveByteOffset();
+ unsigned PrivateSegmentWaveByteOffsetReg;
+
+ if (AMDGPU::isShader(CallConv)) {
+ PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
+ Info->setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
+ } else
+ PrivateSegmentWaveByteOffsetReg = Info->addPrivateSegmentWaveByteOffset();
MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
@@ -923,8 +946,7 @@ SDValue SITargetLowering::LowerFormalArguments(
unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X);
MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
CCInfo.AllocateReg(Reg);
- } else
- llvm_unreachable("workitem id x should always be enabled");
+ }
if (Info->hasWorkItemIDY()) {
unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index c56286e43ef..ef413cb1d1a 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -65,12 +65,12 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
GridWorkgroupCountX(false),
GridWorkgroupCountY(false),
GridWorkgroupCountZ(false),
- WorkGroupIDX(true),
+ WorkGroupIDX(false),
WorkGroupIDY(false),
WorkGroupIDZ(false),
WorkGroupInfo(false),
PrivateSegmentWaveByteOffset(false),
- WorkItemIDX(true),
+ WorkItemIDX(false),
WorkItemIDY(false),
WorkItemIDZ(false) {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
@@ -80,8 +80,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- if (!AMDGPU::isShader(F->getCallingConv()))
+ if (!AMDGPU::isShader(F->getCallingConv())) {
KernargSegmentPtr = true;
+ WorkGroupIDX = true;
+ WorkItemIDX = true;
+ }
if (F->hasFnAttribute("amdgpu-work-group-id-y"))
WorkGroupIDY = true;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 6de944a61d8..ac3497c31d1 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -164,6 +164,10 @@ public:
return PrivateSegmentWaveByteOffsetSystemSGPR;
}
+ void setPrivateSegmentWaveByteOffset(unsigned Reg) {
+ PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
+ }
+
bool hasPrivateSegmentBuffer() const {
return PrivateSegmentBuffer;
}
diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll
index cec16250315..eb704c3b5f7 100644
--- a/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll
+++ b/llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll
@@ -8,8 +8,8 @@
; CI: s_mov_b32 s11, 0x98f000
; VI: s_mov_b32 s11, 0x980000
-; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
; ALL: ; ScratchSize: 32772
define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
@@ -29,8 +29,8 @@ define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
; CI: s_mov_b32 s11, 0x98f000
; VI: s_mov_b32 s11, 0x980000
-; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
; ALL: ; ScratchSize: 32772
define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
index 28a86df9615..b755b786501 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -11,6 +11,7 @@
; GCN-LABEL: {{^}}main:
+; GCN: s_mov_b32 s11, s12
; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GCN-NEXT: s_mov_b32 s14, -1
OpenPOWER on IntegriCloud