diff options
| author | Marek Olsak <marek.olsak@amd.com> | 2017-11-09 01:52:36 +0000 |
|---|---|---|
| committer | Marek Olsak <marek.olsak@amd.com> | 2017-11-09 01:52:36 +0000 |
| commit | 4c421a2db26753e771ca3676053352516e55e2c7 (patch) | |
| tree | 37dcd6944cdf26690cd6b3cc9db73f9615a67dc0 /llvm/lib/Target | |
| parent | 6a0548acaac4c15c1a17aaaf9f4045aaff127a53 (diff) | |
| download | bcm5719-llvm-4c421a2db26753e771ca3676053352516e55e2c7.tar.gz bcm5719-llvm-4c421a2db26753e771ca3676053352516e55e2c7.zip | |
AMDGPU: Merge BUFFER_LOAD_DWORD_OFFSET into x2, x4
Summary: Only 3 (out of 48486) shaders are affected.
Reviewers: arsenm, nhaehnle
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D38951
llvm-svn: 317753
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 50 |
1 files changed, 37 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 1b34125e6c6..3c1657bd252 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -79,6 +79,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass { DS_READ_WRITE, S_BUFFER_LOAD_IMM, BUFFER_LOAD_OFFEN, + BUFFER_LOAD_OFFSET, }; struct CombineInfo { @@ -112,7 +113,7 @@ private: MachineBasicBlock::iterator mergeRead2Pair(CombineInfo &CI); MachineBasicBlock::iterator mergeWrite2Pair(CombineInfo &CI); MachineBasicBlock::iterator mergeSBufferLoadImmPair(CombineInfo &CI); - MachineBasicBlock::iterator mergeBufferLoadOffenPair(CombineInfo &CI); + MachineBasicBlock::iterator mergeBufferLoadPair(CombineInfo &CI); public: static char ID; @@ -232,7 +233,8 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI) { // SMEM offsets must be consecutive. if (CI.InstClass == S_BUFFER_LOAD_IMM || - CI.InstClass == BUFFER_LOAD_OFFEN) { + CI.InstClass == BUFFER_LOAD_OFFEN || + CI.InstClass == BUFFER_LOAD_OFFSET) { unsigned Diff = CI.IsX2 ? 2 : 1; return (EltOffset0 + Diff == EltOffset1 || EltOffset1 + Diff == EltOffset0) && @@ -299,6 +301,10 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { AddrOpName[NumAddresses++] = AMDGPU::OpName::vaddr; AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset; break; + case BUFFER_LOAD_OFFSET: + AddrOpName[NumAddresses++] = AMDGPU::OpName::srsrc; + AddrOpName[NumAddresses++] = AMDGPU::OpName::soffset; + break; default: llvm_unreachable("invalid InstClass"); } @@ -399,7 +405,7 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) { } else { CI.GLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::glc)->getImm(); CI.GLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::glc)->getImm(); - if (CI.InstClass == BUFFER_LOAD_OFFEN) { + if (CI.InstClass != S_BUFFER_LOAD_IMM) { CI.SLC0 = TII->getNamedOperand(*CI.I, AMDGPU::OpName::slc)->getImm(); CI.SLC1 = TII->getNamedOperand(*MBBI, AMDGPU::OpName::slc)->getImm(); } @@ -615,21 +621,31 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSBufferLoadImmPair( return Next; } -MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadOffenPair( +MachineBasicBlock::iterator SILoadStoreOptimizer::mergeBufferLoadPair( CombineInfo &CI) { MachineBasicBlock *MBB = CI.I->getParent(); DebugLoc DL = CI.I->getDebugLoc(); - unsigned Opcode = CI.IsX2 ? AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN : - AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN; + unsigned Opcode; + + if (CI.InstClass == BUFFER_LOAD_OFFEN) { + Opcode = CI.IsX2 ? AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN : + AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN; + } else { + Opcode = CI.IsX2 ? AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET : + AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET; + } const TargetRegisterClass *SuperRC = CI.IsX2 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass; unsigned DestReg = MRI->createVirtualRegister(SuperRC); unsigned MergedOffset = std::min(CI.Offset0, CI.Offset1); - BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg) - .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)) - .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) + auto MIB = BuildMI(*MBB, CI.Paired, DL, TII->get(Opcode), DestReg); + + if (CI.InstClass == BUFFER_LOAD_OFFEN) + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::vaddr)); + + MIB.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::srsrc)) .add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset)) .addImm(MergedOffset) // offset .addImm(CI.GLC0) // glc @@ -724,13 +740,21 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) { continue; } if (Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFEN || - Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN) { - CI.InstClass = BUFFER_LOAD_OFFEN; + Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN || + Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFSET || + Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET) { + if (Opc == AMDGPU::BUFFER_LOAD_DWORD_OFFEN || + Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN) + CI.InstClass = BUFFER_LOAD_OFFEN; + else + CI.InstClass = BUFFER_LOAD_OFFSET; + CI.EltSize = 4; - CI.IsX2 = Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN; + CI.IsX2 = Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN || + Opc == AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET; if (findMatchingInst(CI)) { Modified = true; - I = mergeBufferLoadOffenPair(CI); + I = mergeBufferLoadPair(CI); if (!CI.IsX2) CreatedX2++; } else { |

