summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2016-06-25 03:11:28 +0000
committerKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2016-06-25 03:11:28 +0000
commitf2f3d14774e6019048dccd1ecbca340ae5bbfca1 (patch)
tree65f692e2c2223031bede6426b5f4d1e6646d5655 /llvm/lib/Target/AMDGPU
parent92d33bd2af3452b7272f8d27a75c0e344ae20f38 (diff)
downloadbcm5719-llvm-f2f3d14774e6019048dccd1ecbca340ae5bbfca1.tar.gz
bcm5719-llvm-f2f3d14774e6019048dccd1ecbca340ae5bbfca1.zip
[AMDGPU] Emit debugger prologue and emit the rest of the debugger fields in the kernel code header
Debugger prologue is emitted if -mattr=+amdgpu-debugger-emit-prologue. Debugger prologue writes work group IDs and work item IDs to scratch memory at fixed location in the following format: - offset 0: work group ID x - offset 4: work group ID y - offset 8: work group ID z - offset 16: work item ID x - offset 20: work item ID y - offset 24: work item ID z Set - amd_kernel_code_t::debug_wavefront_private_segment_offset_sgpr to scratch wave offset reg - amd_kernel_code_t::debug_private_segment_buffer_sgpr to scratch rsrc reg - amd_kernel_code_t::is_debug_supported to true if all debugger features are enabled Differential Revision: http://reviews.llvm.org/D20335 llvm-svn: 273769
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp27
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h10
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp50
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp31
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h2
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h60
11 files changed, 207 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index dfd65fadf64..7736fd6c4cf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -329,6 +329,13 @@ def FeatureDebuggerReserveRegs : SubtargetFeature<
"Reserve registers for debugger usage"
>;
+def FeatureDebuggerEmitPrologue : SubtargetFeature<
+ "amdgpu-debugger-emit-prologue",
+ "DebuggerEmitPrologue",
+ "true",
+ "Emit debugger prologue"
+>;
+
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index b18c97acd72..c7784d7a76c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -200,6 +200,13 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount),
false);
+ if (MF.getSubtarget<SISubtarget>().debuggerEmitPrologue()) {
+ OutStreamer->emitRawComment(" DebuggerWavefrontPrivateSegmentOffsetSGPR: s" +
+ Twine(KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR), false);
+ OutStreamer->emitRawComment(" DebuggerPrivateSegmentBufferSGPR: s" +
+ Twine(KernelInfo.DebuggerPrivateSegmentBufferSGPR), false);
+ }
+
OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
false);
@@ -444,6 +451,16 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
MaxVGPR += MFI->getDebuggerReservedVGPRCount();
}
+ // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
+ // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
+ // attribute was specified.
+ if (STM.debuggerEmitPrologue()) {
+ ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
+ RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
+ ProgInfo.DebuggerPrivateSegmentBufferSGPR =
+ RI->getHWRegIndex(MFI->getScratchRSrcReg());
+ }
+
// We found the maximum register index. They start at 0, so add one to get the
// number of registers.
ProgInfo.NumVGPR = MaxVGPR + 1;
@@ -670,6 +687,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
if (MFI->hasDispatchPtr())
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+ if (STM.debuggerSupported())
+ header.code_properties |= AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED;
+
if (STM.isXNACKEnabled())
header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
@@ -681,6 +701,13 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
+ if (STM.debuggerEmitPrologue()) {
+ header.debug_wavefront_private_segment_offset_sgpr =
+ KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
+ header.debug_private_segment_buffer_sgpr =
+ KernelInfo.DebuggerPrivateSegmentBufferSGPR;
+ }
+
AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 87f953fce27..2d44ffefd80 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -42,6 +42,8 @@ private:
FlatUsed(false),
ReservedVGPRFirst(0),
ReservedVGPRCount(0),
+ DebuggerWavefrontPrivateSegmentOffsetSGPR((uint16_t)-1),
+ DebuggerPrivateSegmentBufferSGPR((uint16_t)-1),
VCCUsed(false),
CodeLen(0) {}
@@ -75,6 +77,14 @@ private:
// The number of consecutive VGPRs reserved.
uint16_t ReservedVGPRCount;
+ // Fixed SGPR number used to hold wave scratch offset for entire kernel
+ // execution, or uint16_t(-1) if the register is not used or not known.
+ uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR;
+ // Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
+ // kernel execution, or uint16_t(-1) if the register is not used or not
+ // known.
+ uint16_t DebuggerPrivateSegmentBufferSGPR;
+
// Bonus information for debugging.
bool VCCUsed;
uint64_t CodeLen;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index c6ac84bd3a3..e973f8e4837 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -101,6 +101,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
EnableXNACK(false),
DebuggerInsertNops(false),
DebuggerReserveRegs(false),
+ DebuggerEmitPrologue(false),
EnableVGPRSpilling(false),
EnablePromoteAlloca(false),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 3c1bb5c07f0..53117e3cb60 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -77,6 +77,7 @@ protected:
bool EnableXNACK;
bool DebuggerInsertNops;
bool DebuggerReserveRegs;
+ bool DebuggerEmitPrologue;
// Used as options.
bool EnableVGPRSpilling;
@@ -402,6 +403,11 @@ public:
return EnableSIScheduler;
}
+ bool debuggerSupported() const {
+ return debuggerInsertNops() && debuggerReserveRegs() &&
+ debuggerEmitPrologue();
+ }
+
bool debuggerInsertNops() const {
return DebuggerInsertNops;
}
@@ -410,6 +416,10 @@ public:
return DebuggerReserveRegs;
}
+ bool debuggerEmitPrologue() const {
+ return DebuggerEmitPrologue;
+ }
+
bool loadStoreOptEnabled() const {
return EnableLoadStoreOpt;
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 1f3b361175e..03b11f0fd38 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -39,6 +39,12 @@ static ArrayRef<MCPhysReg> getAllSGPRs() {
void SIFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
+ // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
+ // specified.
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ if (ST.debuggerEmitPrologue())
+ emitDebuggerPrologue(MF, MBB);
+
if (!MF.getFrameInfo()->hasStackObjects())
return;
@@ -54,7 +60,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
if (hasOnlySGPRSpills(MFI, MF.getFrameInfo()))
return;
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *TRI = &TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -87,6 +92,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// pointer. Because we only detect if flat instructions are used at all,
// this will be used more often than necessary on VI.
+ // Debug location must be unknown since the first debug location is used to
+ // determine the end of the prologue.
DebugLoc DL;
unsigned FlatScratchInitReg
@@ -289,3 +296,44 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
RS->addScavengingFrameIndex(ScavengeFI);
}
}
+
+void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ MachineBasicBlock::iterator I = MBB.begin();
+ DebugLoc DL;
+
+ // For each dimension:
+ for (unsigned i = 0; i < 3; ++i) {
+ // Get work group ID SGPR, and make it live-in again.
+ unsigned WorkGroupIDSGPR = MFI->getWorkGroupIDSGPR(i);
+ MF.getRegInfo().addLiveIn(WorkGroupIDSGPR);
+ MBB.addLiveIn(WorkGroupIDSGPR);
+
+ // Since SGPRs are spilled into VGPRs, copy work group ID SGPR to VGPR in
+ // order to spill it to scratch.
+ unsigned WorkGroupIDVGPR =
+ MF.getRegInfo().createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), WorkGroupIDVGPR)
+ .addReg(WorkGroupIDSGPR);
+
+ // Spill work group ID.
+ int WorkGroupIDObjectIdx = MFI->getDebuggerWorkGroupIDStackObjectIndex(i);
+ TII->storeRegToStackSlot(MBB, I, WorkGroupIDVGPR, false,
+ WorkGroupIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
+
+ // Get work item ID VGPR, and make it live-in again.
+ unsigned WorkItemIDVGPR = MFI->getWorkItemIDVGPR(i);
+ MF.getRegInfo().addLiveIn(WorkItemIDVGPR);
+ MBB.addLiveIn(WorkItemIDVGPR);
+
+ // Spill work item ID.
+ int WorkItemIDObjectIdx = MFI->getDebuggerWorkItemIDStackObjectIndex(i);
+ TII->storeRegToStackSlot(MBB, I, WorkItemIDVGPR, false,
+ WorkItemIDObjectIdx, &AMDGPU::VGPR_32RegClass, TRI);
+ }
+}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index c2e7a710817..37417d098f3 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -29,6 +29,10 @@ public:
void processFunctionBeforeFrameFinalized(
MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
+
+private:
+ /// \brief Emits debugger prologue.
+ void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const;
};
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 3142e4c8f42..ad02c4113ca 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -596,6 +596,11 @@ SDValue SITargetLowering::LowerFormalArguments(
return DAG.getEntryNode();
}
+ // Create stack objects that are used for emitting debugger prologue if
+ // "amdgpu-debugger-emit-prologue" attribute was specified.
+ if (ST.debuggerEmitPrologue())
+ createDebuggerPrologueStackObjects(MF);
+
SmallVector<ISD::InputArg, 16> Splits;
BitVector Skipped(Ins.size());
@@ -1258,6 +1263,32 @@ bool SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
}
}
+void SITargetLowering::createDebuggerPrologueStackObjects(
+ MachineFunction &MF) const {
+ // Create stack objects that are used for emitting debugger prologue.
+ //
+ // Debugger prologue writes work group IDs and work item IDs to scratch memory
+ // at fixed location in the following format:
+ // offset 0: work group ID x
+ // offset 4: work group ID y
+ // offset 8: work group ID z
+ // offset 16: work item ID x
+ // offset 20: work item ID y
+ // offset 24: work item ID z
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ int ObjectIdx = 0;
+
+ // For each dimension:
+ for (unsigned i = 0; i < 3; ++i) {
+ // Create fixed stack object for work group ID.
+ ObjectIdx = MF.getFrameInfo()->CreateFixedObject(4, i * 4, true);
+ Info->setDebuggerWorkGroupIDStackObjectIndex(i, ObjectIdx);
+ // Create fixed stack object for work item ID.
+ ObjectIdx = MF.getFrameInfo()->CreateFixedObject(4, i * 4 + 16, true);
+ Info->setDebuggerWorkItemIDStackObjectIndex(i, ObjectIdx);
+ }
+}
+
/// This transforms the control flow intrinsics to get the branch destination as
/// last parameter, also switches branch target with BR if the need arise
SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 032372b7b17..3806e95dff3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -70,6 +70,8 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
bool isCFIntrinsic(const SDNode *Intr) const;
+
+ void createDebuggerPrologueStackObjects(MachineFunction &MF) const;
public:
SITargetLowering(const TargetMachine &tm, const SISubtarget &STI);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 6daebbaa0e4..0904d7dc8ab 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -54,6 +54,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ReturnsVoid(true),
MaximumWorkGroupSize(0),
DebuggerReservedVGPRCount(0),
+ DebuggerWorkGroupIDStackObjectIndices{0, 0, 0},
+ DebuggerWorkItemIDStackObjectIndices{0, 0, 0},
LDSWaveSpillSize(0),
PSInputEna(0),
NumUserSGPRs(0),
@@ -92,16 +94,16 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkItemIDX = true;
}
- if (F->hasFnAttribute("amdgpu-work-group-id-y"))
+ if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
WorkGroupIDY = true;
- if (F->hasFnAttribute("amdgpu-work-group-id-z"))
+ if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
WorkGroupIDZ = true;
- if (F->hasFnAttribute("amdgpu-work-item-id-y"))
+ if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
WorkItemIDY = true;
- if (F->hasFnAttribute("amdgpu-work-item-id-z"))
+ if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
WorkItemIDZ = true;
// X, XY, and XYZ are the only supported combinations, so make sure Y is
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index ee2f722aba5..0ad25f0cae4 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -64,6 +64,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// Number of reserved VGPRs for debugger usage.
unsigned DebuggerReservedVGPRCount;
+ // Stack object indices for work group IDs.
+ int DebuggerWorkGroupIDStackObjectIndices[3];
+ // Stack object indices for work item IDs.
+ int DebuggerWorkItemIDStackObjectIndices[3];
public:
// FIXME: Make private
@@ -334,6 +338,62 @@ public:
return DebuggerReservedVGPRCount;
}
+ /// \returns Stack object index for \p Dim's work group ID.
+ int getDebuggerWorkGroupIDStackObjectIndex(unsigned Dim) const {
+ assert(Dim < 3);
+ return DebuggerWorkGroupIDStackObjectIndices[Dim];
+ }
+
+ /// \brief Sets stack object index for \p Dim's work group ID to \p ObjectIdx.
+ void setDebuggerWorkGroupIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
+ assert(Dim < 3);
+ DebuggerWorkGroupIDStackObjectIndices[Dim] = ObjectIdx;
+ }
+
+ /// \returns Stack object index for \p Dim's work item ID.
+ int getDebuggerWorkItemIDStackObjectIndex(unsigned Dim) const {
+ assert(Dim < 3);
+ return DebuggerWorkItemIDStackObjectIndices[Dim];
+ }
+
+ /// \brief Sets stack object index for \p Dim's work item ID to \p ObjectIdx.
+ void setDebuggerWorkItemIDStackObjectIndex(unsigned Dim, int ObjectIdx) {
+ assert(Dim < 3);
+ DebuggerWorkItemIDStackObjectIndices[Dim] = ObjectIdx;
+ }
+
+ /// \returns SGPR used for \p Dim's work group ID.
+ unsigned getWorkGroupIDSGPR(unsigned Dim) const {
+ switch (Dim) {
+ case 0:
+ assert(hasWorkGroupIDX());
+ return WorkGroupIDXSystemSGPR;
+ case 1:
+ assert(hasWorkGroupIDY());
+ return WorkGroupIDYSystemSGPR;
+ case 2:
+ assert(hasWorkGroupIDZ());
+ return WorkGroupIDZSystemSGPR;
+ }
+ llvm_unreachable("unexpected dimension");
+ }
+
+ /// \returns VGPR used for \p Dim' work item ID.
+ unsigned getWorkItemIDVGPR(unsigned Dim) const {
+ switch (Dim) {
+ case 0:
+ assert(hasWorkItemIDX());
+ return AMDGPU::VGPR0;
+ case 1:
+ assert(hasWorkItemIDY());
+ return AMDGPU::VGPR1;
+ case 2:
+ assert(hasWorkItemIDZ());
+ return AMDGPU::VGPR2;
+ }
+ llvm_unreachable("unexpected dimension");
+ }
+
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
};
OpenPOWER on IntegriCloud