diff options
author | Nicolai Haehnle <nhaehnle@gmail.com> | 2016-07-11 21:44:40 +0000 |
---|---|---|
committer | Nicolai Haehnle <nhaehnle@gmail.com> | 2016-07-11 21:44:40 +0000 |
commit | f52c3cf27251cb1e254d9829b74c538be7adfd06 (patch) | |
tree | 5003d7abdf76e5c25e6b3ad3e7c3e5eaf0197e3d /llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | |
parent | 1128db8fe1c13800ebc77206efc50d0a219b8750 (diff) | |
download | bcm5719-llvm-f52c3cf27251cb1e254d9829b74c538be7adfd06.tar.gz bcm5719-llvm-f52c3cf27251cb1e254d9829b74c538be7adfd06.zip |
AMDGPU: fix local stack slot allocation bugs
Summary:
The main bug fix here is using the 32-bit encoding of V_ADD_I32 in
materializeFrameBaseRegister and resolveFrameIndex, so that arbitrary
immediates work.
The second part is that we may now require the SegmentWaveByteOffset
even when there are initially no stack objects and VGPR spilling isn't
enabled, for stack slots that are allocated later. This means that some
bits become effectively dead and can be cleaned up.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96602
Tested-by: Kai Wasserbäch <kai@dev.carbon-project.org>
Reviewers: arsenm, tstellarAMD
Subscribers: arsenm, llvm-commits, kzhuravl
Differential Revision: http://reviews.llvm.org/D21551
llvm-svn: 275108
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 748209bd065..a57f4a0fe91 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -285,10 +285,13 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) + .addImm(Offset); BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead) - .addImm(Offset) + .addReg(OffsetReg, RegState::Kill) .addFrameIndex(FrameIdx); } @@ -335,13 +338,16 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, assert(Offset != 0 && "Non-zero offset expected"); unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); // In the case the instruction already had an immediate offset, here only // the requested new offset is added because we are leaving the original // immediate in place. + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) + .addImm(Offset); BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), NewReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead) - .addImm(Offset) + .addReg(OffsetReg, RegState::Kill) .addReg(BaseReg); FIOp->ChangeToRegister(NewReg, false); |