summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h9
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp11
-rw-r--r--llvm/test/CodeGen/AMDGPU/debugger_reserve_trap_regs.ll37
7 files changed, 85 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 2964fac4624..70df56112de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -328,6 +328,13 @@ def FeatureDebuggerInsertNops : SubtargetFeature<
"Insert two nop instructions for each high level source statement"
>;
+def FeatureDebuggerReserveTrapRegs : SubtargetFeature<
+ "amdgpu-debugger-reserve-trap-regs",
+ "DebuggerReserveTrapVGPRs",
+ "true",
+ "Reserve VGPRs for trap handler usage"
+>;
+
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 843513d23ca..c06e4b57044 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -235,6 +235,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
" bytes/workgroup (compile time only)", false);
+ OutStreamer->emitRawComment(" ReservedVGPRFirst: " + Twine(KernelInfo.ReservedVGPRFirst),
+ false);
+ OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount),
+ false);
+
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
false);
@@ -472,6 +477,14 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
MaxSGPR += ExtraSGPRs;
+ // Update necessary Reserved* fields and max VGPRs used if
+ // "amdgpu-debugger-reserved-trap-regs" was specified.
+ if (STM.debuggerReserveTrapVGPRs()) {
+ ProgInfo.ReservedVGPRFirst = MaxVGPR + 1;
+ ProgInfo.ReservedVGPRCount = STM.debuggerReserveTrapVGPRCount();
+ MaxVGPR += STM.debuggerReserveTrapVGPRCount();
+ }
+
// We found the maximum register index. They start at 0, so add one to get the
// number of registers.
ProgInfo.NumVGPR = MaxVGPR + 1;
@@ -694,6 +707,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
header.workitem_vgpr_count = KernelInfo.NumVGPR;
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
+ header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
+ header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 2c49ff4316b..acadcc0ebf0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -40,6 +40,8 @@ private:
NumVGPR(0),
NumSGPR(0),
FlatUsed(false),
+ ReservedVGPRFirst(0),
+ ReservedVGPRCount(0),
VCCUsed(false),
CodeLen(0) {}
@@ -67,6 +69,9 @@ private:
uint32_t LDSSize;
bool FlatUsed;
+ uint16_t ReservedVGPRFirst;
+ uint16_t ReservedVGPRCount;
+
// Bonus information for debugging.
bool VCCUsed;
uint64_t CodeLen;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 2861d68104e..2d62abd2b88 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -98,7 +98,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
LDSBankCount(0),
IsaVersion(ISAVersion0_0_0),
EnableSIScheduler(false),
- DebuggerInsertNops(false),
+ DebuggerInsertNops(false), DebuggerReserveTrapVGPRs(false),
FrameLowering(nullptr),
GISel(),
InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 645559e2c83..12e6fee7d26 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -96,6 +96,7 @@ private:
unsigned IsaVersion;
bool EnableSIScheduler;
bool DebuggerInsertNops;
+ bool DebuggerReserveTrapVGPRs;
std::unique_ptr<AMDGPUFrameLowering> FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@@ -309,6 +310,14 @@ public:
return DebuggerInsertNops;
}
+ bool debuggerReserveTrapVGPRs() const {
+ return DebuggerReserveTrapVGPRs;
+ }
+
+ unsigned debuggerReserveTrapVGPRCount() const {
+ return debuggerReserveTrapVGPRs() ? 4 : 0;
+ }
+
bool dumpCode() const {
return DumpCode;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 8f562b66cfb..1f09500ebf1 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -193,6 +193,17 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
}
+ // Reserve VGPRs for trap handler usage if "amdgpu-debugger-reserve-trap-regs"
+ // attribute was specified.
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+ if (ST.debuggerReserveTrapVGPRs()) {
+ for (unsigned i = MaxWorkGroupVGPRCount - ST.debuggerReserveTrapVGPRCount();
+ i < MaxWorkGroupVGPRCount; ++i) {
+ unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
+ reserveRegisterTuples(Reserved, Reg);
+ }
+ }
+
return Reserved;
}
diff --git a/llvm/test/CodeGen/AMDGPU/debugger_reserve_trap_regs.ll b/llvm/test/CodeGen/AMDGPU/debugger_reserve_trap_regs.ll
new file mode 100644
index 00000000000..2c857f688af
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/debugger_reserve_trap_regs.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-reserve-trap-regs -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK: reserved_vgpr_count = 4
+; CHECK: ReservedVGPRCount: 4
+
+; Function Attrs: nounwind
+define void @debugger_reserve_trap_regs(i32 addrspace(1)* %A) #0 {
+entry:
+ %A.addr = alloca i32 addrspace(1)*, align 4
+ store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
+ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0
+ store i32 1, i32 addrspace(1)* %arrayidx, align 4
+ %1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1
+ store i32 2, i32 addrspace(1)* %arrayidx1, align 4
+ %2 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %2, i32 2
+ store i32 3, i32 addrspace(1)* %arrayidx2, align 4
+ %3 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 4
+ store i32 4, i32 addrspace(1)* %arrayidx3, align 4
+ ret void
+}
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="fiji" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!opencl.kernels = !{!0}
+!llvm.ident = !{!6}
+
+!0 = !{void (i32 addrspace(1)*)* @debugger_reserve_trap_regs, !1, !2, !3, !4, !5}
+!1 = !{!"kernel_arg_addr_space", i32 1}
+!2 = !{!"kernel_arg_access_qual", !"none"}
+!3 = !{!"kernel_arg_type", !"int*"}
+!4 = !{!"kernel_arg_base_type", !"int*"}
+!5 = !{!"kernel_arg_type_qual", !""}
+!6 = !{!"clang version 3.9.0 (trunk 266639)"}
OpenPOWER on IntegriCloud