diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-09 01:13:56 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-09 01:13:56 +0000 |
commit | 1322b6f8bbf39db37ffc717c9fd208f2726ede38 (patch) | |
tree | b564bc4adb22fe878c433a04cfac89b1e2b7b284 /llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | |
parent | 95c7897555f4f99f5ce71f84ab11b63c256293f2 (diff) | |
download | bcm5719-llvm-1322b6f8bbf39db37ffc717c9fd208f2726ede38.tar.gz bcm5719-llvm-1322b6f8bbf39db37ffc717c9fd208f2726ede38.zip |
AMDGPU: Improve offset folding for register indexing
llvm-svn: 274954
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 62 |
1 files changed, 40 insertions, 22 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 930fcb9c837..1ba14cd2d1c 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -435,7 +435,7 @@ void SILowerControlFlow::emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB, BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) .addReg(AMDGPU::VCC); - if (Offset) { + if (Offset != 0) { BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) .addReg(AMDGPU::M0) .addImm(Offset); @@ -463,7 +463,7 @@ bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offs const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx); if (AMDGPU::SReg_32RegClass.contains(Idx->getReg())) { - if (Offset) { + if (Offset != 0) { BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0) .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef())) .addImm(Offset); @@ -520,16 +520,17 @@ bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offs return true; } -/// \param @VecReg The register which holds element zero of the vector -/// being addressed into. -/// \param[out] @Reg The base register to use in the indirect addressing instruction. -/// \param[in,out] @Offset As an input, this is the constant offset part of the -// indirect Index. e.g. v0 = v[VecReg + Offset] -// As an output, this is a constant value that needs -// to be added to the value stored in M0. +/// \param @VecReg The register which holds element zero of the vector being +/// addressed into. +// +/// \param[in] @Idx The index operand from the movrel instruction. This must be +// a register, but may be NoRegister. +/// +/// \param[in] @Offset As an input, this is the constant offset part of the +// indirect Index. e.g. v0 = v[VecReg + Offset] As an output, this is a constant +// value that needs to be added to the value stored in M0. std::pair<unsigned, int> -SILowerControlFlow::computeIndirectRegAndOffset(unsigned VecReg, - int Offset) const { +SILowerControlFlow::computeIndirectRegAndOffset(unsigned VecReg, int Offset) const { unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0); if (!SubReg) SubReg = VecReg; @@ -560,42 +561,59 @@ SILowerControlFlow::computeIndirectRegAndOffset(unsigned VecReg, // Return true if a new block was inserted. bool SILowerControlFlow::indirectSrc(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); unsigned Dst = MI.getOperand(0).getReg(); const MachineOperand *SrcVec = TII->getNamedOperand(MI, AMDGPU::OpName::src); - int Off = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm(); + int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm(); unsigned Reg; - std::tie(Reg, Off) = computeIndirectRegAndOffset(SrcVec->getReg(), Off); + std::tie(Reg, Offset) = computeIndirectRegAndOffset(SrcVec->getReg(), Offset); + + const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx); + if (Idx->getReg() == AMDGPU::NoRegister) { + // Only had a constant offset, copy the register directly. + BuildMI(MBB, MI.getIterator(), DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst) + .addReg(Reg, getUndefRegState(SrcVec->isUndef())); + MI.eraseFromParent(); + return false; + } MachineInstr *MovRel = BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) .addReg(Reg, getUndefRegState(SrcVec->isUndef())) .addReg(SrcVec->getReg(), RegState::Implicit); - return loadM0(MI, MovRel, Off); + return loadM0(MI, MovRel, Offset); } // Return true if a new block was inserted. bool SILowerControlFlow::indirectDst(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); - DebugLoc DL = MI.getDebugLoc(); + const DebugLoc &DL = MI.getDebugLoc(); unsigned Dst = MI.getOperand(0).getReg(); - int Off = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm(); - MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val); + int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm(); unsigned Reg; - std::tie(Reg, Off) = computeIndirectRegAndOffset(Dst, Off); + const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val); + std::tie(Reg, Offset) = computeIndirectRegAndOffset(Dst, Offset); + + MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx); + if (Idx->getReg() == AMDGPU::NoRegister) { + // Only had a constant offset, copy the register directly. + BuildMI(MBB, MI.getIterator(), DL, TII->get(AMDGPU::V_MOV_B32_e32), Reg) + .addOperand(*Val); + MI.eraseFromParent(); + return false; + } MachineInstr *MovRel = - BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) - .addReg(Reg, RegState::Define) + BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32), Reg) .addReg(Val->getReg(), getUndefRegState(Val->isUndef())) .addReg(Dst, RegState::Implicit); - return loadM0(MI, MovRel, Off); + return loadM0(MI, MovRel, Offset); } bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { |