diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index b611f28fcab..9018e3882d9 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1044,18 +1044,29 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, unsigned CarryOut = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned ScaledReg - = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - // XXX - Should this use a vector shift? - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg) - .addReg(DiffReg, RegState::Kill) - .addImm(Log2_32(ST.getWavefrontSize())); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64), ScaledReg) + .addImm(Log2_32(ST.getWavefrontSize())) + .addReg(DiffReg, RegState::Kill); // TODO: Fold if use instruction is another add of a constant. - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) - .addReg(CarryOut, RegState::Define | RegState::Dead) - .addImm(Offset) - .addReg(ScaledReg, RegState::Kill); + if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm())) { + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) + .addReg(CarryOut, RegState::Define | RegState::Dead) + .addImm(Offset) + .addReg(ScaledReg, RegState::Kill); + } else { + unsigned ConstOffsetReg + = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg) + .addImm(Offset); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), ResultReg) + .addReg(CarryOut, RegState::Define | RegState::Dead) + .addReg(ConstOffsetReg, RegState::Kill) + .addReg(ScaledReg, RegState::Kill); + } MRI.setRegAllocationHint(CarryOut, 0, AMDGPU::VCC); } |