summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-09-14 17:14:57 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-09-14 17:14:57 +0000
commit6efd082c01223609117303256dc3a08b143901dd (patch)
tree5a229b5b8f3d6be2fe52f12ba07a7d267dd43268 /llvm/lib
parenta8daf1747c6b93624dac4feed5a9087d8fde0278 (diff)
downloadbcm5719-llvm-6efd082c01223609117303256dc3a08b143901dd.tar.gz
bcm5719-llvm-6efd082c01223609117303256dc3a08b143901dd.zip
AMDGPU: Make frame register caller preserved
Using SplitCSR for the frame register was very broken. Often the copies in the prolog and epilog were optimized out, in addition to them being inserted after the true prolog where the FP was clobbered. I have a hacky solution which works that continues to use split CSR, but for now this is simpler and will get to working programs. llvm-svn: 313274
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp15
2 files changed, 16 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index f0900c8bb3e..8454dede0e1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -59,16 +59,7 @@ const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
const MCPhysReg *
SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
- // FIXME
- static MCPhysReg Regs[2];
-
- const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- assert(!MFI->isEntryFunction());
-
- Regs[0] = MFI->getFrameOffsetReg();
- Regs[1] = AMDGPU::NoRegister;
-
- return Regs;
+ return nullptr;
}
const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 48faee9bb99..a5c5ecc694e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2148,6 +2148,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
MachineFrameInfo &MFI = MF.getFrameInfo();
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SDValue CallerSavedFP;
+
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall) {
@@ -2164,6 +2166,13 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue ScratchWaveOffsetReg
= DAG.getCopyFromReg(Chain, DL, OffsetReg, MVT::i32);
RegsToPass.emplace_back(AMDGPU::SGPR4, ScratchWaveOffsetReg);
+
+ if (!Info->isEntryFunction()) {
+ // Avoid clobbering this function's FP value. In the current convention
+ // callee will overwrite this, so do save/restore around the call site.
+ CallerSavedFP = DAG.getCopyFromReg(Chain, DL,
+ Info->getFrameOffsetReg(), MVT::i32);
+ }
}
// Stack pointer relative accesses are done by changing the offset SGPR. This
@@ -2344,6 +2353,12 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain = Call.getValue(0);
InFlag = Call.getValue(1);
+ if (CallerSavedFP) {
+ SDValue FPReg = DAG.getRegister(Info->getFrameOffsetReg(), MVT::i32);
+ Chain = DAG.getCopyToReg(Chain, DL, FPReg, CallerSavedFP, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
uint64_t CalleePopBytes = 0;
Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(NumBytes, DL, MVT::i32),
DAG.getTargetConstant(CalleePopBytes, DL, MVT::i32),
OpenPOWER on IntegriCloud