[AMDGPU] Fix VGPR spills where offset doesn't fit in 12 bits

Scale the offset of VGPR spills by the wave size when it cannot fit in the 12-bit offset immediate field and so is added to the soffset SGPR. This accounts for hardware swizzling of scratch memory. Differential Revision: https://reviews.llvm.org/D49448 llvm-svn: 338060
author: Scott Linder <scott@scottlinder.com> 2018-07-26 19:47:51 +0000
committer: Scott Linder <scott@scottlinder.com> 2018-07-26 19:47:51 +0000
commit: eb1f75d561762123224c17b763f68def84a05715 (patch)
tree: c179aebdeab30fc69f5db24ff1c9bcb711d9fe32 /llvm/lib/Target
parent: 6d6eab66e0f76c8bb908492dcc863308b66f7771 (diff)
download: bcm5719-llvm-eb1f75d561762123224c17b763f68def84a05715.tar.gz
bcm5719-llvm-eb1f75d561762123224c17b763f68def84a05715.zip
1 files changed, 16 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 5bfe071c00e..624607f6ea5 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -532,22 +532,29 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
   const DebugLoc &DL = MI->getDebugLoc();
   bool IsStore = Desc.mayStore();
 
-  bool RanOutOfSGPRs = false;
   bool Scavenged = false;
   unsigned SOffset = ScratchOffsetReg;
 
+  const unsigned EltSize = 4;
   const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
-  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
-  unsigned Size = NumSubRegs * 4;
+  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
+  unsigned Size = NumSubRegs * EltSize;
   int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
-  const int64_t OriginalImmOffset = Offset;
+  int64_t ScratchOffsetRegDelta = 0;
 
   unsigned Align = MFI.getObjectAlignment(Index);
   const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
 
-  if (!isUInt<12>(Offset + Size)) {
+  assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
+
+  if (!isUInt<12>(Offset + Size - EltSize)) {
     SOffset = AMDGPU::NoRegister;
 
+    // We currently only support spilling VGPRs to EltSize boundaries, meaning
+    // we can simplify the adjustment of Offset here to just scale with
+    // WavefrontSize.
+    Offset *= ST.getWavefrontSize();
+
     // We don't have access to the register scavenger if this function is called
     // during  PEI::scavengeFrameVirtualRegs().
     if (RS)
@@ -561,8 +568,8 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
       // add the offset directly to the ScratchOffset register, and then
       // subtract the offset after the spill to return ScratchOffset to it's
       // original value.
-      RanOutOfSGPRs = true;
       SOffset = ScratchOffsetReg;
+      ScratchOffsetRegDelta = Offset;
     } else {
       Scavenged = true;
     }
@@ -574,8 +581,6 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
     Offset = 0;
   }
 
-  const unsigned EltSize = 4;
-
   for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
     unsigned SubReg = NumSubRegs == 1 ?
       ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
@@ -607,11 +612,11 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
       MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
   }
 
-  if (RanOutOfSGPRs) {
+  if (ScratchOffsetRegDelta != 0) {
     // Subtract the offset we added to the ScratchOffset register.
     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
-      .addReg(ScratchOffsetReg)
-      .addImm(OriginalImmOffset);
+        .addReg(ScratchOffsetReg)
+        .addImm(ScratchOffsetRegDelta);
   }
 }
author	Scott Linder <scott@scottlinder.com>	2018-07-26 19:47:51 +0000
committer	Scott Linder <scott@scottlinder.com>	2018-07-26 19:47:51 +0000
commit	eb1f75d561762123224c17b763f68def84a05715 (patch)
tree	c179aebdeab30fc69f5db24ff1c9bcb711d9fe32 /llvm/lib/Target
parent	6d6eab66e0f76c8bb908492dcc863308b66f7771 (diff)
download	bcm5719-llvm-eb1f75d561762123224c17b763f68def84a05715.tar.gz bcm5719-llvm-eb1f75d561762123224c17b763f68def84a05715.zip