summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
diff options
context:
space:
mode:
authorMarek Olsak <marek.olsak@amd.com>2017-11-09 01:52:17 +0000
committerMarek Olsak <marek.olsak@amd.com>2017-11-09 01:52:17 +0000
commitffadcb744bc3d8ce0ca516bab09445643c1061a5 (patch)
tree793f4419ec97e06c11200049c96cf0139c6eb1ad /llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
parent8a94f220aa8f728a87fcc8166838bf1c7315b73e (diff)
downloadbcm5719-llvm-ffadcb744bc3d8ce0ca516bab09445643c1061a5.tar.gz
bcm5719-llvm-ffadcb744bc3d8ce0ca516bab09445643c1061a5.zip
AMDGPU: Fold immediate offset into BUFFER_LOAD_DWORD lowered from SMEM
Summary: -5.3% code size in affected shaders. Changed stats only: 48486 shaders in 30489 tests Totals: SGPRS: 2086406 -> 2072430 (-0.67 %) VGPRS: 1626872 -> 1627960 (0.07 %) Spilled SGPRs: 7865 -> 7912 (0.60 %) Code Size: 60978060 -> 60188764 (-1.29 %) bytes Max Waves: 374530 -> 374342 (-0.05 %) Totals from affected shaders: SGPRS: 299664 -> 285688 (-4.66 %) VGPRS: 233844 -> 234932 (0.47 %) Spilled SGPRs: 3959 -> 4006 (1.19 %) Code Size: 14905272 -> 14115976 (-5.30 %) bytes Max Waves: 46202 -> 46014 (-0.41 %) Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D38915 llvm-svn: 317750
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp34
1 files changed, 32 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 542bfdaffbf..5b851749a80 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3712,13 +3712,43 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: {
unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ const MachineOperand *VAddr = getNamedOperand(Inst, AMDGPU::OpName::soff);
+ auto Add = MRI.getUniqueVRegDef(VAddr->getReg());
+ unsigned Offset = 0;
+
+ // See if we can extract an immediate offset by recognizing one of these:
+ // V_ADD_I32_e32 dst, imm, src1
+ // V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
+ // V_ADD will be removed by "Remove dead machine instructions".
+ if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) {
+ const MachineOperand *Src =
+ getNamedOperand(*Add, AMDGPU::OpName::src0);
+
+ if (Src && Src->isReg()) {
+ auto Mov = MRI.getUniqueVRegDef(Src->getReg());
+ if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
+ Src = &Mov->getOperand(1);
+ }
+
+ if (Src) {
+ if (Src->isImm())
+ Offset = Src->getImm();
+ else if (Src->isCImm())
+ Offset = Src->getCImm()->getZExtValue();
+ }
+
+ if (Offset && isLegalMUBUFImmOffset(Offset))
+ VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1);
+ else
+ Offset = 0;
+ }
BuildMI(*MBB, Inst, Inst.getDebugLoc(),
get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
- .add(*getNamedOperand(Inst, AMDGPU::OpName::soff)) // vaddr
+ .add(*VAddr) // vaddr
.add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
.addImm(0) // soffset
- .addImm(0) // offset
+ .addImm(Offset) // offset
.addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
.addImm(0) // slc
.addImm(0) // tfe
OpenPOWER on IntegriCloud