diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-09-14 17:14:57 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-09-14 17:14:57 +0000 |
| commit | 6efd082c01223609117303256dc3a08b143901dd (patch) | |
| tree | 5a229b5b8f3d6be2fe52f12ba07a7d267dd43268 /llvm/lib | |
| parent | a8daf1747c6b93624dac4feed5a9087d8fde0278 (diff) | |
| download | bcm5719-llvm-6efd082c01223609117303256dc3a08b143901dd.tar.gz bcm5719-llvm-6efd082c01223609117303256dc3a08b143901dd.zip | |
AMDGPU: Make frame register caller preserved
Using SplitCSR for the frame register was very broken. Often
the copies in the prolog and epilog were optimized out, in addition
to them being inserted after the true prolog where the FP
was clobbered.
I have a hacky solution which works that continues to use
split CSR, but for now this is simpler and will get to working
programs.
llvm-svn: 313274
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 15 |
2 files changed, 16 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp index f0900c8bb3e..8454dede0e1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -59,16 +59,7 @@ const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs( const MCPhysReg * SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const { - // FIXME - static MCPhysReg Regs[2]; - - const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); - assert(!MFI->isEntryFunction()); - - Regs[0] = MFI->getFrameOffsetReg(); - Regs[1] = AMDGPU::NoRegister; - - return Regs; + return nullptr; } const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 48faee9bb99..a5c5ecc694e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2148,6 +2148,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, MachineFrameInfo &MFI = MF.getFrameInfo(); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + SDValue CallerSavedFP; + // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!IsSibCall) { @@ -2164,6 +2166,13 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue ScratchWaveOffsetReg = DAG.getCopyFromReg(Chain, DL, OffsetReg, MVT::i32); RegsToPass.emplace_back(AMDGPU::SGPR4, ScratchWaveOffsetReg); + + if (!Info->isEntryFunction()) { + // Avoid clobbering this function's FP value. In the current convention + // callee will overwrite this, so do save/restore around the call site. + CallerSavedFP = DAG.getCopyFromReg(Chain, DL, + Info->getFrameOffsetReg(), MVT::i32); + } } // Stack pointer relative accesses are done by changing the offset SGPR. This @@ -2344,6 +2353,12 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = Call.getValue(0); InFlag = Call.getValue(1); + if (CallerSavedFP) { + SDValue FPReg = DAG.getRegister(Info->getFrameOffsetReg(), MVT::i32); + Chain = DAG.getCopyToReg(Chain, DL, FPReg, CallerSavedFP, InFlag); + InFlag = Chain.getValue(1); + } + uint64_t CalleePopBytes = 0; Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(NumBytes, DL, MVT::i32), DAG.getTargetConstant(CalleePopBytes, DL, MVT::i32), |

