summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/R600/SIInstrInfo.cpp163
-rw-r--r--llvm/lib/Target/R600/SIInstrInfo.h4
-rw-r--r--llvm/lib/Target/R600/SIInstructions.td23
-rw-r--r--llvm/lib/Target/R600/SIMachineFunctionInfo.cpp57
-rw-r--r--llvm/lib/Target/R600/SIMachineFunctionInfo.h7
-rw-r--r--llvm/lib/Target/R600/SIRegisterInfo.cpp7
-rw-r--r--llvm/lib/Target/R600/SIRegisterInfo.h6
7 files changed, 231 insertions, 36 deletions
diff --git a/llvm/lib/Target/R600/SIInstrInfo.cpp b/llvm/lib/Target/R600/SIInstrInfo.cpp
index 5d08b91ea7b..454b7c2d55f 100644
--- a/llvm/lib/Target/R600/SIInstrInfo.cpp
+++ b/llvm/lib/Target/R600/SIInstrInfo.cpp
@@ -187,27 +187,45 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
DebugLoc DL = MBB.findDebugLoc(MI);
unsigned KillFlag = isKill ? RegState::Kill : 0;
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
- unsigned Lane = MFI->SpillTracker.getNextLane(MRI);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
- MFI->SpillTracker.LaneVGPR)
+ unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent());
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR)
.addReg(SrcReg, KillFlag)
.addImm(Lane);
+ MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane);
+ } else if (RI.isSGPRClass(RC)) {
+ // We are only allowed to create one new instruction when spilling
+ // registers, so we need to use pseudo instruction for vector
+ // registers.
+ //
+ // Reserve a spot in the spill tracker for each sub-register of
+ // the vector register.
+ unsigned NumSubRegs = RC->getSize() / 4;
+ unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent(),
+ NumSubRegs);
MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
- Lane);
- } else {
- for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
- unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg)
- .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
- storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i,
- &AMDGPU::SReg_32RegClass, TRI);
+ FirstLane);
+
+ unsigned Opcode;
+ switch (RC->getSize() * 8) {
+ case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
+ case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
+ case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
+ case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
+ default: llvm_unreachable("Cannot spill register class");
}
+
+ BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR)
+ .addReg(SrcReg)
+ .addImm(FrameIndex);
+ } else {
+ llvm_unreachable("VGPR spilling not supported");
}
}
@@ -216,30 +234,125 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
DebugLoc DL = MBB.findDebugLoc(MI);
if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) {
- SIMachineFunctionInfo::SpilledReg Spill =
+ SIMachineFunctionInfo::SpilledReg Spill =
MFI->SpillTracker.getSpilledReg(FrameIndex);
assert(Spill.VGPR);
BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg)
.addReg(Spill.VGPR)
.addImm(Spill.Lane);
+ insertNOPs(MI, 3);
+ } else if (RI.isSGPRClass(RC)){
+ unsigned Opcode;
+ switch(RC->getSize() * 8) {
+ case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
+ case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
+ case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
+ case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
+ default: llvm_unreachable("Cannot spill register class");
+ }
+
+ SIMachineFunctionInfo::SpilledReg Spill =
+ MFI->SpillTracker.getSpilledReg(FrameIndex);
+
+ BuildMI(MBB, MI, DL, get(Opcode), DestReg)
+ .addReg(Spill.VGPR)
+ .addImm(FrameIndex);
+ insertNOPs(MI, 3);
} else {
- for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
- unsigned Flags = RegState::Define;
- if (i == 0) {
- Flags |= RegState::Undef;
- }
- unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i,
- &AMDGPU::SReg_32RegClass, TRI);
- BuildMI(MBB, MI, DL, get(AMDGPU::COPY))
- .addReg(DestReg, Flags, RI.getSubRegFromChannel(i))
- .addReg(SubReg);
+ llvm_unreachable("VGPR spilling not supported");
+ }
+}
+
+static unsigned getNumSubRegsForSpillOp(unsigned Op) {
+
+ switch (Op) {
+ case AMDGPU::SI_SPILL_S512_SAVE:
+ case AMDGPU::SI_SPILL_S512_RESTORE:
+ return 16;
+ case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S256_RESTORE:
+ return 8;
+ case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S128_RESTORE:
+ return 4;
+ case AMDGPU::SI_SPILL_S64_SAVE:
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ return 2;
+ default: llvm_unreachable("Invalid spill opcode");
+ }
+}
+
+void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
+ int Count) const {
+ while (Count > 0) {
+ int Arg;
+ if (Count >= 8)
+ Arg = 7;
+ else
+ Arg = Count - 1;
+ Count -= 8;
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
+ .addImm(Arg);
+ }
+}
+
+bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ SIMachineFunctionInfo *MFI =
+ MI->getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ switch (MI->getOpcode()) {
+ default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
+
+ // SGPR register spill
+ case AMDGPU::SI_SPILL_S512_SAVE:
+ case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S64_SAVE: {
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+ unsigned FrameIndex = MI->getOperand(2).getImm();
+
+ for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+ SIMachineFunctionInfo::SpilledReg Spill;
+ unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(),
+ &AMDGPU::SGPR_32RegClass, i);
+ Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
+ MI->getOperand(0).getReg())
+ .addReg(SubReg)
+ .addImm(Spill.Lane + i);
}
+ MI->eraseFromParent();
+ break;
}
+
+ // SGPR register restore
+ case AMDGPU::SI_SPILL_S512_RESTORE:
+ case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S128_RESTORE:
+ case AMDGPU::SI_SPILL_S64_RESTORE: {
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+
+ for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+ SIMachineFunctionInfo::SpilledReg Spill;
+ unsigned FrameIndex = MI->getOperand(2).getImm();
+ unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(),
+ &AMDGPU::SGPR_32RegClass, i);
+ Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(Spill.Lane + i);
+ }
+ MI->eraseFromParent();
+ break;
+ }
+ }
+ return true;
}
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
diff --git a/llvm/lib/Target/R600/SIInstrInfo.h b/llvm/lib/Target/R600/SIInstrInfo.h
index 63f1d7fdee8..d7992742216 100644
--- a/llvm/lib/Target/R600/SIInstrInfo.h
+++ b/llvm/lib/Target/R600/SIInstrInfo.h
@@ -73,6 +73,8 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
unsigned commuteOpcode(unsigned Opcode) const;
MachineInstr *commuteInstruction(MachineInstr *MI,
@@ -165,6 +167,8 @@ public:
void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
unsigned SavReg, unsigned IndexReg) const;
+
+ void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
};
namespace AMDGPU {
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td
index 27e7abe1a38..b93de36ddf8 100644
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
@@ -369,7 +369,7 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
let Predicates = [isSI] in {
-//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
+def S_NOP : SOPP <0x00000000, (ins i16imm:$SIMM16), "S_NOP $SIMM16", []>;
let isTerminator = 1 in {
@@ -1574,6 +1574,27 @@ def V_SUB_F64 : InstSI <
} // end usesCustomInserter
+multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
+
+ def _SAVE : InstSI <
+ (outs VReg_32:$dst),
+ (ins sgpr_class:$src, i32imm:$frame_idx),
+ "", []
+ >;
+
+ def _RESTORE : InstSI <
+ (outs sgpr_class:$dst),
+ (ins VReg_32:$src, i32imm:$frame_idx),
+ "", []
+ >;
+
+}
+
+defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
+defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
+defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
+defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
+
} // end IsCodeGenOnly, isPseudo
def : Pat<
diff --git a/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp b/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
index ea04346e509..af609958129 100644
--- a/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -10,8 +10,11 @@
#include "SIMachineFunctionInfo.h"
+#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#define MAX_LANES 64
@@ -26,21 +29,57 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PSInputAddr(0),
SpillTracker() { }
-static unsigned createLaneVGPR(MachineRegisterInfo &MRI) {
- return MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+static unsigned createLaneVGPR(MachineRegisterInfo &MRI, MachineFunction *MF) {
+ unsigned VGPR = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+
+ // We need to add this register as live out for the function, in order to
+ // have the live range calculated directly.
+ //
+ // When register spilling begins, we have already calculated the live
+ // live intervals for all the registers. Since we are spilling SGPRs to
+ // VGPRs, we need to update the Lane VGPR's live interval every time we
+ // spill or restore a register.
+ //
+ // Unfortunately, there is no good way to update the live interval as
+ // the TargetInstrInfo callbacks for spilling and restoring don't give
+ // us access to the live interval information.
+ //
+ // We are lucky, though, because the InlineSpiller calls
+ // LiveRangeEdit::calculateRegClassAndHint() which iterates through
+ // all the new register that have been created when restoring a register
+ // and calls LiveIntervals::getInterval(), which creates and computes
+ // the live interval for the newly created register. However, once this
+ // live intervals is created, it doesn't change and since we usually reuse
+ // the Lane VGPR multiple times, this means any uses after the first aren't
+ // added to the live interval.
+ //
+ // To work around this, we add Lane VGPRs to the functions live out list,
+ // so that we can guarantee its live range will cover all of its uses.
+
+ for (MachineBasicBlock &MBB : *MF) {
+ if (MBB.back().getOpcode() == AMDGPU::S_ENDPGM) {
+ MBB.back().addOperand(*MF, MachineOperand::CreateReg(VGPR, false, true));
+ return VGPR;
+ }
+ }
+ MF->getFunction()->getContext().emitError(
+ "Could not found S_ENGPGM instrtuction.");
+ return VGPR;
}
-unsigned SIMachineFunctionInfo::RegSpillTracker::getNextLane(MachineRegisterInfo &MRI) {
+unsigned SIMachineFunctionInfo::RegSpillTracker::reserveLanes(
+ MachineRegisterInfo &MRI, MachineFunction *MF, unsigned NumRegs) {
+ unsigned StartLane = CurrentLane;
+ CurrentLane += NumRegs;
if (!LaneVGPR) {
- LaneVGPR = createLaneVGPR(MRI);
+ LaneVGPR = createLaneVGPR(MRI, MF);
} else {
- CurrentLane++;
- if (CurrentLane == MAX_LANES) {
- CurrentLane = 0;
- LaneVGPR = createLaneVGPR(MRI);
+ if (CurrentLane >= MAX_LANES) {
+ StartLane = CurrentLane = 0;
+ LaneVGPR = createLaneVGPR(MRI, MF);
}
}
- return CurrentLane;
+ return StartLane;
}
void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex,
diff --git a/llvm/lib/Target/R600/SIMachineFunctionInfo.h b/llvm/lib/Target/R600/SIMachineFunctionInfo.h
index ef38270d51a..96e619bde8d 100644
--- a/llvm/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/R600/SIMachineFunctionInfo.h
@@ -43,7 +43,12 @@ public:
public:
unsigned LaneVGPR;
RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { }
- unsigned getNextLane(MachineRegisterInfo &MRI);
+ /// \p NumRegs The number of consecutive registers what need to be spilled.
+ /// This function will ensure that all registers are stored in
+ /// the same VGPR.
+ /// \returns The lane to be used for storing the first register.
+ unsigned reserveLanes(MachineRegisterInfo &MRI, MachineFunction *MF,
+ unsigned NumRegs = 1);
void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1);
const SpilledReg& getSpilledReg(unsigned FrameIndex);
bool programSpillsRegisters() { return !SpilledRegisters.empty(); }
diff --git a/llvm/lib/Target/R600/SIRegisterInfo.cpp b/llvm/lib/Target/R600/SIRegisterInfo.cpp
index 5897fbca94c..8dc9a05799c 100644
--- a/llvm/lib/Target/R600/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/R600/SIRegisterInfo.cpp
@@ -129,3 +129,10 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
return &AMDGPU::VGPR_32RegClass;
}
}
+
+unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
+ const TargetRegisterClass *SubRC,
+ unsigned Channel) const {
+ unsigned Index = getHWRegIndex(Reg);
+ return SubRC->getRegister(Index + Channel);
+}
diff --git a/llvm/lib/Target/R600/SIRegisterInfo.h b/llvm/lib/Target/R600/SIRegisterInfo.h
index 54717c184bc..36b4fcd32a8 100644
--- a/llvm/lib/Target/R600/SIRegisterInfo.h
+++ b/llvm/lib/Target/R600/SIRegisterInfo.h
@@ -63,6 +63,12 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// be returned.
const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
unsigned SubIdx) const;
+
+ /// \p Channel This is the register channel (e.g. a value from 0-16), not the
+ /// SubReg index.
+ /// \returns The sub-register of Reg that is in Channel.
+ unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC,
+ unsigned Channel) const;
};
} // End namespace llvm
OpenPOWER on IntegriCloud