summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp13
-rw-r--r--llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll18
2 files changed, 31 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index d64e6555d03..9cb9045ca76 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -837,6 +837,19 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
ProgInfo.NumSGPR += ExtraSGPRs;
ProgInfo.NumVGPR += ExtraVGPRs;
+ // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
+ // dispatch registers are function args.
+ unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
+ for (auto &Arg : MF.getFunction().args()) {
+ unsigned NumRegs = (Arg.getType()->getPrimitiveSizeInBits() + 31) / 32;
+ if (Arg.hasAttribute(Attribute::InReg))
+ WaveDispatchNumSGPR += NumRegs;
+ else
+ WaveDispatchNumVGPR += NumRegs;
+ }
+ ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
+ ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
+
// Adjust number of registers used to meet default/requested minimum/maximum
// number of waves per execution unit request.
ProgInfo.NumSGPRsForWavesPerEU = std::max(
diff --git a/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll b/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll
new file mode 100644
index 00000000000..06174f8b858
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/wave_dispatch_regs.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s
+; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s
+
+; This compute shader has input args that claim that it has 17 sgprs and 5 vgprs
+; in wave dispatch. Ensure that the sgpr and vgpr counts in COMPUTE_PGM_RSRC1
+; are set to reflect that, even though the registers are not used in the shader.
+
+; GCN-LABEL: {{^}}_amdgpu_cs_main:
+; SI: .amd_amdgpu_pal_metadata{{.*}}0x2e12,0x{{[0-9a-f]*}}81,
+; VI: .amd_amdgpu_pal_metadata{{.*}}0x2e12,0x{{[0-9a-f]*}}c1,
+; GFX9: .amd_amdgpu_pal_metadata{{.*}}0x2e12,0x{{[0-9a-f]*}}81,
+
+define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg, i32 inreg, <2 x i32> inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, <3 x i32> inreg, i32 inreg, <5 x i32>) {
+.entry:
+ ret void
+}
+
OpenPOWER on IntegriCloud