diff options
| author | Marek Olsak <marek.olsak@amd.com> | 2017-11-09 01:52:17 +0000 |
|---|---|---|
| committer | Marek Olsak <marek.olsak@amd.com> | 2017-11-09 01:52:17 +0000 |
| commit | ffadcb744bc3d8ce0ca516bab09445643c1061a5 (patch) | |
| tree | 793f4419ec97e06c11200049c96cf0139c6eb1ad /llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | |
| parent | 8a94f220aa8f728a87fcc8166838bf1c7315b73e (diff) | |
| download | bcm5719-llvm-ffadcb744bc3d8ce0ca516bab09445643c1061a5.tar.gz bcm5719-llvm-ffadcb744bc3d8ce0ca516bab09445643c1061a5.zip | |
AMDGPU: Fold immediate offset into BUFFER_LOAD_DWORD lowered from SMEM
Summary:
-5.3% code size in affected shaders.
Changed stats only:
48486 shaders in 30489 tests
Totals:
SGPRS: 2086406 -> 2072430 (-0.67 %)
VGPRS: 1626872 -> 1627960 (0.07 %)
Spilled SGPRs: 7865 -> 7912 (0.60 %)
Code Size: 60978060 -> 60188764 (-1.29 %) bytes
Max Waves: 374530 -> 374342 (-0.05 %)
Totals from affected shaders:
SGPRS: 299664 -> 285688 (-4.66 %)
VGPRS: 233844 -> 234932 (0.47 %)
Spilled SGPRs: 3959 -> 4006 (1.19 %)
Code Size: 14905272 -> 14115976 (-5.30 %) bytes
Max Waves: 46202 -> 46014 (-0.41 %)
Reviewers: arsenm, nhaehnle
Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D38915
llvm-svn: 317750
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 34 |
1 files changed, 32 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 542bfdaffbf..5b851749a80 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3712,13 +3712,43 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: { unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + const MachineOperand *VAddr = getNamedOperand(Inst, AMDGPU::OpName::soff); + auto Add = MRI.getUniqueVRegDef(VAddr->getReg()); + unsigned Offset = 0; + + // See if we can extract an immediate offset by recognizing one of these: + // V_ADD_I32_e32 dst, imm, src1 + // V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1 + // V_ADD will be removed by "Remove dead machine instructions". + if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) { + const MachineOperand *Src = + getNamedOperand(*Add, AMDGPU::OpName::src0); + + if (Src && Src->isReg()) { + auto Mov = MRI.getUniqueVRegDef(Src->getReg()); + if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32) + Src = &Mov->getOperand(1); + } + + if (Src) { + if (Src->isImm()) + Offset = Src->getImm(); + else if (Src->isCImm()) + Offset = Src->getCImm()->getZExtValue(); + } + + if (Offset && isLegalMUBUFImmOffset(Offset)) + VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1); + else + Offset = 0; + } BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst) - .add(*getNamedOperand(Inst, AMDGPU::OpName::soff)) // vaddr + .add(*VAddr) // vaddr .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc .addImm(0) // soffset - .addImm(0) // offset + .addImm(Offset) // offset .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm()) .addImm(0) // slc .addImm(0) // tfe |

