diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-09-04 17:12:57 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-09-04 17:12:57 +0000 |
commit | 84489b34f6f147b4de6127441f3051e0d0e08364 (patch) | |
tree | cadb66b916c27d26472ae6c527052ca5bc3a9b4b /llvm/lib/Target | |
parent | e6b26f2f91a3b80d6ee726ee1b6147d72252cc55 (diff) | |
download | bcm5719-llvm-84489b34f6f147b4de6127441f3051e0d0e08364.tar.gz bcm5719-llvm-84489b34f6f147b4de6127441f3051e0d0e08364.zip |
AMDGPU: Handle frame index expansion with no free SGPRs pre gfx9
Since an add instruction must produce an unused carry out, this
requires additional SGPRs. This can be avoided by keeping the entire
offset computation in SGPRs. If one SGPR is still available, this only
costs one extra mov. If none are available, the entire computation can
be done in place and reversed.
This does assume the use is a VGPR operand. This was already assumed,
and we currently only select frame indexes to VALU instructions. This
should probably be fixed at some point to handle more possible MIR.
llvm-svn: 370929
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 82 |
2 files changed, 58 insertions, 26 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 7b7c34ed8a2..2a3a1b34094 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6098,7 +6098,7 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, Register UnusedCarry = RS.scavengeRegister(RI.getBoolRC(), I, 0, false); // TODO: Users need to deal with this. if (!UnusedCarry.isValid()) - report_fatal_error("failed to scavenge unused carry-out SGPR"); + return MachineInstrBuilder(); return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 29f50503ad5..939ea033b20 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1273,35 +1273,67 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, if (Offset == 0) { // XXX - This never happens because of emergency scavenging slot at 0? BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ResultReg) - .addImm(Log2_32(ST.getWavefrontSize())) + .addImm(ST.getWavefrontSizeLog2()) .addReg(DiffReg); } else { - Register ScaledReg = - RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); - - // FIXME: Assusmed VGPR use. - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg) - .addImm(Log2_32(ST.getWavefrontSize())) - .addReg(DiffReg, RegState::Kill); - - // TODO: Fold if use instruction is another add of a constant. - if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { - - // FIXME: This can fail - TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS) - .addImm(Offset) - .addReg(ScaledReg, RegState::Kill) - .addImm(0); // clamp bit + if (auto MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) { + Register ScaledReg = + RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MIB, 0); + + BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), + ScaledReg) + .addImm(ST.getWavefrontSizeLog2()) + .addReg(DiffReg, RegState::Kill); + + // TODO: Fold if use instruction is another add of a constant. + if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { + // FIXME: This can fail + MIB.addImm(Offset); + MIB.addReg(ScaledReg, RegState::Kill); + MIB.addImm(0); // clamp bit + } else { + Register ConstOffsetReg = + RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MIB, 0, false); + + // This should always be able to use the unused carry out. + assert(ConstOffsetReg && "this scavenge should not be able to fail"); + + BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) + .addImm(Offset); + MIB.addReg(ConstOffsetReg, RegState::Kill); + MIB.addReg(ScaledReg, RegState::Kill); + MIB.addImm(0); // clamp bit + } } else { - Register ConstOffsetReg = - RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false); - - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) - .addImm(Offset); - TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS) - .addReg(ConstOffsetReg, RegState::Kill) + // We have to produce a carry out, and we there isn't a free SGPR + // pair for it. We can keep the whole computation on the SALU to + // avoid clobbering an additional register at the cost of an extra + // mov. + + // We may have 1 free scratch SGPR even though a carry out is + // unavailable. Only one additional mov is needed. + Register TmpScaledReg = + RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false); + Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : DiffReg; + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg) + .addReg(DiffReg, RegState::Kill) + .addImm(ST.getWavefrontSizeLog2()); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), ScaledReg) .addReg(ScaledReg, RegState::Kill) - .addImm(0); // clamp bit + .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg) + .addReg(ScaledReg, RegState::Kill); + + // If there were truly no free SGPRs, we need to undo everything. + if (!TmpScaledReg.isValid()) { + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScaledReg) + .addReg(ScaledReg, RegState::Kill) + .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg) + .addReg(DiffReg, RegState::Kill) + .addImm(ST.getWavefrontSizeLog2()); + } } } |