summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-07-21 15:45:01 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-07-21 15:45:01 +0000
commitb02094e115fcc94c01b4d274ea24645fd3ea0c58 (patch)
tree83d43c7b53db71fb76e4727e5f04665851821a2e /llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
parent42639a57de19504aaae3c1aad13699e8c4005432 (diff)
downloadbcm5719-llvm-b02094e115fcc94c01b4d274ea24645fd3ea0c58.tar.gz
bcm5719-llvm-b02094e115fcc94c01b4d274ea24645fd3ea0c58.zip
R600/SI: Use scratch memory for large private arrays
llvm-svn: 213551
Diffstat (limited to 'llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp')
-rw-r--r--llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp24
1 files changed, 23 insertions, 1 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 257f72e5ce6..73faaa18358 100644
--- a/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -25,6 +25,7 @@
#include "SIDefines.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
@@ -141,6 +142,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
false);
OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
false);
+ OutStreamer.emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
+ false);
} else {
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
OutStreamer.emitRawComment(
@@ -332,6 +335,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// Do not clamp NAN to 0.
ProgInfo.DX10Clamp = 0;
+ const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ ProgInfo.ScratchSize = FrameInfo->estimateStackSize(MF);
+
ProgInfo.CodeLen = CodeSize;
}
@@ -361,6 +367,15 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
unsigned LDSBlocks =
RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
+ // Scratch is allocated in 256 dword blocks.
+ unsigned ScratchAlignShift = 10;
+ // We need to program the hardware with the amount of scratch memory that
+ // is used by the entire wave. KernelInfo.ScratchSize is the amount of
+ // scratch memory used per thread.
+ unsigned ScratchBlocks =
+ RoundUpToAlignment(KernelInfo.ScratchSize * STM.getWavefrontSize(),
+ 1 << ScratchAlignShift) >> ScratchAlignShift;
+
if (MFI->getShaderType() == ShaderType::COMPUTE) {
OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
@@ -377,7 +392,14 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer.EmitIntValue(ComputePGMRSrc1, 4);
OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
- OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
+ const uint32_t ComputePGMRSrc2 =
+ S_00B84C_LDS_SIZE(LDSBlocks) |
+ S_00B02C_SCRATCH_EN(ScratchBlocks > 0);
+
+ OutStreamer.EmitIntValue(ComputePGMRSrc2, 4);
+
+ OutStreamer.EmitIntValue(R_00B860_COMPUTE_TMPRING_SIZE, 4);
+ OutStreamer.EmitIntValue(S_00B860_WAVESIZE(ScratchBlocks), 4);
} else {
OutStreamer.EmitIntValue(RsrcReg, 4);
OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
OpenPOWER on IntegriCloud