summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2016-07-11 21:44:40 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2016-07-11 21:44:40 +0000
commitf52c3cf27251cb1e254d9829b74c538be7adfd06 (patch)
tree5003d7abdf76e5c25e6b3ad3e7c3e5eaf0197e3d /llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
parent1128db8fe1c13800ebc77206efc50d0a219b8750 (diff)
downloadbcm5719-llvm-f52c3cf27251cb1e254d9829b74c538be7adfd06.tar.gz
bcm5719-llvm-f52c3cf27251cb1e254d9829b74c538be7adfd06.zip
AMDGPU: fix local stack slot allocation bugs
Summary: The main bug fix here is using the 32-bit encoding of V_ADD_I32 in materializeFrameBaseRegister and resolveFrameIndex, so that arbitrary immediates work. The second part is that we may now require the SegmentWaveByteOffset even when there are initially no stack objects and VGPR spilling isn't enabled, for stack slots that are allocated later. This means that some bits become effectively dead and can be cleaned up. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96602 Tested-by: Kai Wasserbäch <kai@dev.carbon-project.org> Reviewers: arsenm, tstellarAMD Subscribers: arsenm, llvm-commits, kzhuravl Differential Revision: http://reviews.llvm.org/D21551 llvm-svn: 275108
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp10
1 files changed, 8 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 748209bd065..a57f4a0fe91 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -285,10 +285,13 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
+ .addImm(Offset);
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg)
.addReg(UnusedCarry, RegState::Define | RegState::Dead)
- .addImm(Offset)
+ .addReg(OffsetReg, RegState::Kill)
.addFrameIndex(FrameIdx);
}
@@ -335,13 +338,16 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
assert(Offset != 0 && "Non-zero offset expected");
unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+ unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
// In the case the instruction already had an immediate offset, here only
// the requested new offset is added because we are leaving the original
// immediate in place.
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
+ .addImm(Offset);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), NewReg)
.addReg(UnusedCarry, RegState::Define | RegState::Dead)
- .addImm(Offset)
+ .addReg(OffsetReg, RegState::Kill)
.addReg(BaseReg);
FIOp->ChangeToRegister(NewReg, false);
OpenPOWER on IntegriCloud