diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-02-12 06:31:30 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-02-12 06:31:30 +0000 |
commit | 296b849163d3f3ed0c342c71c3d5800accda438c (patch) | |
tree | e218a2364e6080456137e4f2f25ed97107c5acdd /llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | |
parent | f71d653879fb5e9cb2c13e060cbb080f6ab2ff4c (diff) | |
download | bcm5719-llvm-296b849163d3f3ed0c342c71c3d5800accda438c.tar.gz bcm5719-llvm-296b849163d3f3ed0c342c71c3d5800accda438c.zip |
AMDGPU: Set flat_scratch from flat_scratch_init reg
This was hardcoded to the static private size, but this
would be missing the offset and additional size for someday
when we have dynamic sizing.
Also stops always initializing flat_scratch even when unused.
In the future we should stop emitting this unless flat instructions
are used to access private memory. For example this will initialize
it almost always on VI because flat is used for global access.
llvm-svn: 260658
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 38 |
1 files changed, 3 insertions, 35 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 37ba7eef3d6..edcfb0889bb 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -572,43 +572,11 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { AMDGPU::EXEC).addReg(AMDGPU::EXEC); } - // FIXME: This seems inappropriate to do here. if (NeedFlat && MFI->IsKernel) { - // Insert the prologue initializing the SGPRs pointing to the scratch space - // for flat accesses. - const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - // TODO: What to use with function calls? - - // FIXME: This is reporting stack size that is used in a scratch buffer - // rather than registers as well. - uint64_t StackSizeBytes = FrameInfo->getStackSize(); - - int IndirectBegin - = static_cast<const AMDGPUInstrInfo*>(TII)->getIndirectIndexBegin(MF); - // Convert register index to 256-byte unit. - uint64_t StackOffset = IndirectBegin < 0 ? 0 : (4 * IndirectBegin / 256); - - assert((StackSizeBytes < 0xffff) && StackOffset < 0xffff && - "Stack limits should be smaller than 16-bits"); - - // Initialize the flat scratch register pair. - // TODO: Can we use one s_mov_b64 here? - - // Offset is in units of 256-bytes. - MachineBasicBlock &MBB = MF.front(); - DebugLoc NoDL; - MachineBasicBlock::iterator Start = MBB.getFirstNonPHI(); - const MCInstrDesc &SMovK = TII->get(AMDGPU::S_MOVK_I32); - - assert(isInt<16>(StackOffset) && isInt<16>(StackSizeBytes)); - - BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_LO) - .addImm(StackOffset); - - // Documentation says size is "per-thread scratch size in bytes" - BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_HI) - .addImm(StackSizeBytes); + // We will need to Initialize the flat scratch register pair. + if (NeedFlat) + MFI->setHasFlatInstructions(true); } return true; |