summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td50
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp119
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp45
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/R600RegisterInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.h2
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp165
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h11
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp19
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp43
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h14
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td18
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h1
22 files changed, 461 insertions, 116 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
index 09bdf8ffcde..251cb7a2c44 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -38,7 +38,8 @@ class AMDGPUCallLowering: public CallLowering {
unsigned VReg) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<unsigned> VRegs) const override;
- CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
+ static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
+ static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
};
} // End of namespace llvm;
#endif
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index d308f718aae..4bef7a89bfe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -13,6 +13,8 @@
// Inversion of CCIfInReg
class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
+class CCIfExtend<CCAction A>
+ : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
// Calling convention for SI
def CC_SI : CallingConv<[
@@ -52,7 +54,7 @@ def CC_SI : CallingConv<[
]>>>
]>;
-def RetCC_SI : CallingConv<[
+def RetCC_SI_Shader : CallingConv<[
CCIfType<[i32] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
@@ -99,6 +101,52 @@ def CC_AMDGPU_Kernel : CallingConv<[
CCCustom<"allocateKernArg">
]>;
+def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs<
+ (sequence "VGPR%u", 24, 255)
+>;
+
+def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs<
+ (sequence "VGPR%u", 32, 255)
+>;
+
+def CSR_AMDGPU_SGPRs_32_103 : CalleeSavedRegs<
+ (sequence "SGPR%u", 32, 103)
+>;
+
+def CSR_AMDGPU_HighRegs : CalleeSavedRegs<
+ (add CSR_AMDGPU_VGPRs_32_255, CSR_AMDGPU_SGPRs_32_103)
+>;
+
+// Calling convention for leaf functions
+def CC_AMDGPU_Func : CallingConv<[
+ CCIfByVal<CCPassByVal<4, 4>>,
+ CCIfType<[i1], CCPromoteToType<i32>>,
+ CCIfType<[i1, i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
+ CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1], CCAssignToReg<[
+ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+ VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
+ CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>,
+ CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
+ CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
+ CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,
+ CCIfType<[v8i32, v8f32], CCAssignToStack<32, 4>>,
+ CCIfType<[v16i32, v16f32], CCAssignToStack<64, 4>>
+]>;
+
+// Calling convention for leaf functions
+def RetCC_AMDGPU_Func : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i32>>,
+ CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
+ CCIfType<[i32, f32, i16, f16, v2i16, v2f16], CCAssignToReg<[
+ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+ VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
+ CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>
+]>;
+
def CC_AMDGPU : CallingConv<[
CCIf<"static_cast<const AMDGPUSubtarget&>"
"(State.getMachineFunction().getSubtarget()).getGeneration() >="
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index f80652b8737..b49263d61a3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -76,6 +76,45 @@ static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
+// Allocate up to VGPR31.
+//
+// TODO: Since there are no VGPR alignent requirements would it be better to
+// split into individual scalar registers?
+static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ switch (LocVT.SimpleTy) {
+ case MVT::i64:
+ case MVT::f64:
+ case MVT::v2i32:
+ case MVT::v2f32: {
+ return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
+ &AMDGPU::VReg_64RegClass, 31);
+ }
+ case MVT::v4i32:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v2f64: {
+ return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
+ &AMDGPU::VReg_128RegClass, 29);
+ }
+ case MVT::v8i32:
+ case MVT::v8f32: {
+ return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
+ &AMDGPU::VReg_256RegClass, 25);
+
+ }
+ case MVT::v16i32:
+ case MVT::v16f32: {
+ return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
+ &AMDGPU::VReg_512RegClass, 17);
+
+ }
+ default:
+ return false;
+ }
+}
+
#include "AMDGPUGenCallingConv.inc"
// Find a larger type to do a load / store of a vector with.
@@ -773,8 +812,43 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
//===---------------------------------------------------------------------===//
CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
- bool IsVarArg) const {
- return CC_AMDGPU;
+ bool IsVarArg) {
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return CC_AMDGPU_Kernel;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_HS:
+ return CC_AMDGPU;
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return CC_AMDGPU_Func;
+ default:
+ report_fatal_error("Unsupported calling convention.");
+ }
+}
+
+CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
+ bool IsVarArg) {
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return CC_AMDGPU_Kernel;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_HS:
+ return RetCC_SI_Shader;
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return RetCC_AMDGPU_Func;
+ default:
+ report_fatal_error("Unsupported calling convention.");
+ }
}
/// The SelectionDAGBuilder will automatically promote function arguments
@@ -874,18 +948,15 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(CCState &State,
}
}
-void AMDGPUTargetLowering::AnalyzeReturn(CCState &State,
- const SmallVectorImpl<ISD::OutputArg> &Outs) const {
-
- State.AnalyzeReturn(Outs, RetCC_SI);
-}
-
-SDValue
-AMDGPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SDLoc &DL, SelectionDAG &DAG) const {
+SDValue AMDGPUTargetLowering::LowerReturn(
+ SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SDLoc &DL, SelectionDAG &DAG) const {
+ // FIXME: Fails for r600 tests
+ //assert(!isVarArg && Outs.empty() && OutVals.empty() &&
+ // "wave terminate should not have return values");
return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain);
}
@@ -896,20 +967,12 @@ AMDGPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) {
- switch (CC) {
- case CallingConv::C:
- case CallingConv::AMDGPU_KERNEL:
- case CallingConv::SPIR_KERNEL:
- return CC_AMDGPU_Kernel;
- case CallingConv::AMDGPU_VS:
- case CallingConv::AMDGPU_HS:
- case CallingConv::AMDGPU_GS:
- case CallingConv::AMDGPU_PS:
- case CallingConv::AMDGPU_CS:
- return CC_AMDGPU;
- default:
- report_fatal_error("Unsupported calling convention.");
- }
+ return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg);
+}
+
+CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
+ bool IsVarArg) {
+ return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg);
}
SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 4c588a7bafd..fb2f15022d2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -115,9 +115,6 @@ protected:
SmallVectorImpl<SDValue> &Results) const;
void analyzeFormalArgumentsCompute(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
- void AnalyzeReturn(CCState &State,
- const SmallVectorImpl<ISD::OutputArg> &Outs) const;
-
public:
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
@@ -164,6 +161,8 @@ public:
bool isCheapToSpeculateCtlz() const override;
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
+ static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
+
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 353cc574279..e286558ce60 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -380,6 +380,6 @@ def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index da247fea7de..f1ef6281c90 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -126,9 +126,15 @@ bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO,
}
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+ unsigned Opcode = MI->getOpcode();
- int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode());
+ // FIXME: Should be able to handle this with emitPseudoExpansionLowering. We
+ // need to select it to the subtarget specific version, and there's no way to
+ // do that with a single pseudo source operation.
+ if (Opcode == AMDGPU::S_SETPC_B64_return)
+ Opcode = AMDGPU::S_SETPC_B64;
+ int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(Opcode);
if (MCOpcode == -1) {
LLVMContext &C = MI->getParent()->getParent()->getFunction()->getContext();
C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have "
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index fe7283ccf7d..9fb7f5f8892 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -12,21 +12,6 @@
using namespace llvm;
-static bool isEntryFunctionCC(CallingConv::ID CC) {
- switch (CC) {
- case CallingConv::AMDGPU_KERNEL:
- case CallingConv::SPIR_KERNEL:
- case CallingConv::AMDGPU_VS:
- case CallingConv::AMDGPU_HS:
- case CallingConv::AMDGPU_GS:
- case CallingConv::AMDGPU_PS:
- case CallingConv::AMDGPU_CS:
- return true;
- default:
- return false;
- }
-}
-
AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MachineFunctionInfo(),
LocalMemoryObjects(),
@@ -34,7 +19,7 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
MaxKernArgAlign(0),
LDSSize(0),
ABIArgOffset(0),
- IsEntryFunction(isEntryFunctionCC(MF.getFunction()->getCallingConv())),
+ IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction()->getCallingConv())),
NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
// FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
// except reserved size is not correctly aligned.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index 941f2d8a468..b2867fcc49f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -14,6 +14,7 @@
#include "AMDGPURegisterInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "SIRegisterInfo.h"
using namespace llvm;
@@ -24,18 +25,6 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
// they are not supported at this time.
//===----------------------------------------------------------------------===//
-// Dummy to not crash RegisterClassInfo.
-static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister;
-
-const MCPhysReg *AMDGPURegisterInfo::getCalleeSavedRegs(
- const MachineFunction *) const {
- return &CalleeSavedReg;
-}
-
-unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return AMDGPU::NoRegister;
-}
-
unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
static const unsigned SubRegs[] = {
AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
@@ -50,3 +39,35 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
#define GET_REGINFO_TARGET_DESC
#include "AMDGPUGenRegisterInfo.inc"
+
+
+// Forced to be here by one .inc
+const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
+ const MachineFunction *MF) const {
+ CallingConv::ID CC = MF->getFunction()->getCallingConv();
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return CSR_AMDGPU_HighRegs_SaveList;
+ default: {
+ // Dummy to not crash RegisterClassInfo.
+ static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
+ return &NoCalleeSavedReg;
+ }
+ }
+}
+
+const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID CC) const {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::Fast:
+ return CSR_AMDGPU_HighRegs_RegMask;
+ default:
+ return nullptr;
+ }
+}
+
+unsigned SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ return AMDGPU::NoRegister;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index 22b1663821d..d8604d2590f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -30,9 +30,6 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
unsigned getSubRegFromChannel(unsigned Channel) const;
-
- const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const override;
- unsigned getFrameRegister(const MachineFunction &MF) const override;
};
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
index dfdc602b80c..7501facb0cb 100644
--- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -56,6 +56,18 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
+// Dummy to not crash RegisterClassInfo.
+static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister;
+
+const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs(
+ const MachineFunction *) const {
+ return &CalleeSavedReg;
+}
+
+unsigned R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ return AMDGPU::NoRegister;
+}
+
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
}
diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.h b/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
index 9dfb3106c6c..f0d9644b02f 100644
--- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -27,6 +27,8 @@ struct R600RegisterInfo final : public AMDGPURegisterInfo {
R600RegisterInfo();
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
/// \brief get the HW encoding for a register's channel.
unsigned getHWRegChan(unsigned reg) const;
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 1279f845de0..97bb0f0c065 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -189,8 +189,6 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
// ----
// 13 (+1)
unsigned ReservedRegCount = 13;
- if (SPReg != AMDGPU::NoRegister)
- ++ReservedRegCount;
if (AllSGPRs.size() < ReservedRegCount)
return std::make_pair(ScratchWaveOffsetReg, SPReg);
@@ -208,13 +206,6 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
MFI->setScratchWaveOffsetReg(Reg);
ScratchWaveOffsetReg = Reg;
- } else {
- if (SPReg == AMDGPU::NoRegister)
- break;
-
- MRI.replaceRegWith(SPReg, Reg);
- MFI->setStackPtrOffsetReg(Reg);
- SPReg = Reg;
break;
}
}
@@ -223,8 +214,8 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
return std::make_pair(ScratchWaveOffsetReg, SPReg);
}
-void SIFrameLowering::emitPrologue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
+void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
// Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
// specified.
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
@@ -424,6 +415,13 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
}
}
+void SIFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ if (MFI->isEntryFunction())
+ emitEntryFunctionPrologue(MF, MBB);
+}
+
void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 7ccd02b3c86..e17adbe2736 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -26,6 +26,8 @@ public:
AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
~SIFrameLowering() override = default;
+ void emitEntryFunctionPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const;
void emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4b27bba8188..01c1f78e7ca 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -914,6 +914,55 @@ SDValue SITargetLowering::lowerKernargMemParameter(
return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
}
+SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
+ const SDLoc &SL, SDValue Chain,
+ const ISD::InputArg &Arg) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ if (Arg.Flags.isByVal()) {
+ unsigned Size = Arg.Flags.getByValSize();
+ int FrameIdx = MFI.CreateFixedObject(Size, VA.getLocMemOffset(), false);
+ return DAG.getFrameIndex(FrameIdx, MVT::i32);
+ }
+
+ unsigned ArgOffset = VA.getLocMemOffset();
+ unsigned ArgSize = VA.getValVT().getStoreSize();
+
+ int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, true);
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+ SDValue ArgValue;
+
+ // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ MVT MemVT = VA.getValVT();
+
+ switch (VA.getLocInfo()) {
+ default:
+ break;
+ case CCValAssign::BCvt:
+ MemVT = VA.getLocVT();
+ break;
+ case CCValAssign::SExt:
+ ExtType = ISD::SEXTLOAD;
+ break;
+ case CCValAssign::ZExt:
+ ExtType = ISD::ZEXTLOAD;
+ break;
+ case CCValAssign::AExt:
+ ExtType = ISD::EXTLOAD;
+ break;
+ }
+
+ ArgValue = DAG.getExtLoad(
+ ExtType, SL, VA.getLocVT(), Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+ MemVT);
+ return ArgValue;
+}
+
static void processShaderInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
CallingConv::ID CallConv,
ArrayRef<ISD::InputArg> Ins,
@@ -1094,10 +1143,12 @@ static void allocateSystemSGPRs(CCState &CCInfo,
static void reservePrivateMemoryRegs(const TargetMachine &TM,
MachineFunction &MF,
const SIRegisterInfo &TRI,
- SIMachineFunctionInfo &Info) {
+ SIMachineFunctionInfo &Info,
+ bool NeedSP) {
// Now that we've figured out where the scratch register inputs are, see if
// should reserve the arguments and use them directly.
- bool HasStackObjects = MF.getFrameInfo().hasStackObjects();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool HasStackObjects = MFI.hasStackObjects();
// Record that we know we have non-spill stack objects so we don't need to
// check all stack objects later.
@@ -1155,6 +1206,15 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
Info.setScratchWaveOffsetReg(ReservedOffsetReg);
}
}
+
+ if (NeedSP){
+ unsigned ReservedStackPtrOffsetReg = TRI.reservedStackPtrOffsetReg(MF);
+ Info.setStackPtrOffsetReg(ReservedStackPtrOffsetReg);
+
+ assert(Info.getStackPtrOffsetReg() != Info.getFrameOffsetReg());
+ assert(!TRI.isSubRegister(Info.getScratchRSrcReg(),
+ Info.getStackPtrOffsetReg()));
+ }
}
SDValue SITargetLowering::LowerFormalArguments(
@@ -1223,8 +1283,10 @@ SDValue SITargetLowering::LowerFormalArguments(
!Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
!Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
!Info->hasWorkItemIDZ());
+ } else if (IsKernel) {
+ assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
} else {
- assert(!IsKernel || (Info->hasWorkGroupIDX() && Info->hasWorkItemIDX()));
+ Splits.append(Ins.begin(), Ins.end());
}
if (IsEntryFunc) {
@@ -1278,11 +1340,14 @@ SDValue SITargetLowering::LowerFormalArguments(
InVals.push_back(Arg);
continue;
+ } else if (!IsEntryFunc && VA.isMemLoc()) {
+ SDValue Val = lowerStackParameter(DAG, VA, DL, Chain, Arg);
+ InVals.push_back(Val);
+ if (!Arg.Flags.isByVal())
+ Chains.push_back(Val.getValue(1));
+ continue;
}
- if (VA.isMemLoc())
- report_fatal_error("memloc not supported with calling convention");
-
assert(VA.isRegLoc() && "Parameter must be in a register!");
unsigned Reg = VA.getLocReg();
@@ -1291,7 +1356,7 @@ SDValue SITargetLowering::LowerFormalArguments(
Reg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- if (Arg.VT.isVector()) {
+ if (IsShader && Arg.VT.isVector()) {
// Build a vector from the registers
Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
unsigned NumElements = ParamType->getVectorNumElements();
@@ -1317,16 +1382,49 @@ SDValue SITargetLowering::LowerFormalArguments(
InVals.push_back(Val);
}
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+
+ // TODO: Could maybe omit SP if only tail calls?
+ bool NeedSP = FrameInfo.hasCalls() || FrameInfo.hasVarSizedObjects();
+
// Start adding system SGPRs.
- if (IsEntryFunc)
+ if (IsEntryFunc) {
allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader);
-
- reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
+ reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info, NeedSP);
+ } else {
+ CCInfo.AllocateReg(Info->getScratchRSrcReg());
+ CCInfo.AllocateReg(Info->getScratchWaveOffsetReg());
+ CCInfo.AllocateReg(Info->getFrameOffsetReg());
+
+ if (NeedSP) {
+ unsigned StackPtrReg = findFirstFreeSGPR(CCInfo);
+ CCInfo.AllocateReg(StackPtrReg);
+ Info->setStackPtrOffsetReg(StackPtrReg);
+ }
+ }
return Chains.empty() ? Chain :
DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
+// TODO: If return values can't fit in registers, we should return as many as
+// possible in registers before passing on stack.
+bool SITargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv,
+ MachineFunction &MF, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ // Replacing returns with sret/stack usage doesn't make sense for shaders.
+ // FIXME: Also sort of a workaround for custom vector splitting in LowerReturn
+ // for shaders. Vector types should be explicitly handled by CC.
+ if (AMDGPU::isEntryFunctionCC(CallConv))
+ return true;
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, IsVarArg));
+}
+
SDValue
SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
@@ -1336,11 +1434,15 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- if (!AMDGPU::isShader(CallConv))
+ if (AMDGPU::isKernel(CallConv)) {
return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs,
OutVals, DL, DAG);
+ }
+
+ bool IsShader = AMDGPU::isShader(CallConv);
Info->setIfReturnsVoid(Outs.size() == 0);
+ bool IsWaveEnd = Info->returnsVoid() && IsShader;
SmallVector<ISD::OutputArg, 48> Splits;
SmallVector<SDValue, 48> SplitVals;
@@ -1349,7 +1451,7 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
const ISD::OutputArg &Out = Outs[i];
- if (Out.VT.isVector()) {
+ if (IsShader && Out.VT.isVector()) {
MVT VT = Out.VT.getVectorElementType();
ISD::OutputArg NewOut = Out;
NewOut.Flags.setSplit();
@@ -1380,29 +1482,58 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
*DAG.getContext());
// Analyze outgoing return values.
- AnalyzeReturn(CCInfo, Splits);
+ CCInfo.AnalyzeReturn(Splits, CCAssignFnForReturn(CallConv, isVarArg));
SDValue Flag;
SmallVector<SDValue, 48> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ // Add return address for callable functions.
+ if (!Info->isEntryFunction()) {
+ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
+ SDValue ReturnAddrReg = CreateLiveInRegister(
+ DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64);
+
+ // FIXME: Should be able to use a vreg here, but need a way to prevent it
+ // from being allcoated to a CSR.
+
+ SDValue PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF),
+ MVT::i64);
+
+ Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, Flag);
+ Flag = Chain.getValue(1);
+
+ RetOps.push_back(PhysReturnAddrReg);
+ }
+
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
i != RVLocs.size();
++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
+ // TODO: Partially return in registers if return values don't fit.
SDValue Arg = SplitVals[realRVLocIdx];
// Copied from other backends.
switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ default:
+ llvm_unreachable("Unknown loc info!");
}
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
@@ -1410,12 +1541,16 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ // FIXME: Does sret work properly?
+
// Update chain and glue.
RetOps[0] = Chain;
if (Flag.getNode())
RetOps.push_back(Flag);
- unsigned Opc = Info->returnsVoid() ? AMDGPUISD::ENDPGM : AMDGPUISD::RETURN_TO_EPILOG;
+ unsigned Opc = AMDGPUISD::ENDPGM;
+ if (!IsWaveEnd)
+ Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG;
return DAG.getNode(Opc, DL, MVT::Other, RetOps);
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 046e677756d..e6883774749 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -28,6 +28,10 @@ class SITargetLowering final : public AMDGPUTargetLowering {
uint64_t Offset, bool Signed,
const ISD::InputArg *Arg = nullptr) const;
+ SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
+ const SDLoc &SL, SDValue Chain,
+ const ISD::InputArg &Arg) const;
+
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const override;
SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
@@ -177,7 +181,12 @@ public:
const SDLoc &DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
- SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ bool CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index adebb8c4a1c..18b197ddb7a 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -80,17 +80,22 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
WavesPerEU = ST.getWavesPerEU(*F);
- // Non-entry functions have no special inputs for now.
- // TODO: Return early for non-entry CCs.
+ if (!isEntryFunction()) {
+ // Non-entry functions have no special inputs for now, other registers
+ // required for scratch access.
+ ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
+ ScratchWaveOffsetReg = AMDGPU::SGPR4;
+ FrameOffsetReg = AMDGPU::SGPR5;
+ return;
+ }
CallingConv::ID CC = F->getCallingConv();
- if (CC == CallingConv::AMDGPU_PS)
- PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
-
- if (AMDGPU::isKernel(CC)) {
+ if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
+ } else if (CC == CallingConv::AMDGPU_PS) {
+ PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
}
if (ST.debuggerEmitPrologue()) {
@@ -120,7 +125,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
bool MaySpill = ST.isVGPRSpillingEnabled(*F);
- bool HasStackObjects = FrameInfo.hasStackObjects();
+ bool HasStackObjects = FrameInfo.hasStackObjects() || FrameInfo.hasCalls();
if (HasStackObjects || MaySpill) {
PrivateSegmentWaveByteOffset = true;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index dc9f509e60a..348bb4fa026 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -388,9 +388,8 @@ public:
void setScratchWaveOffsetReg(unsigned Reg) {
assert(Reg != AMDGPU::NoRegister && "Should never be unset");
ScratchWaveOffsetReg = Reg;
-
- // FIXME: Only for entry functions.
- FrameOffsetReg = ScratchWaveOffsetReg;
+ if (isEntryFunction())
+ FrameOffsetReg = ScratchWaveOffsetReg;
}
unsigned getQueuePtrUserSGPR() const {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 676c508598c..6fb01a09fe1 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -117,11 +117,7 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
}
-unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
- const MachineFunction &MF) const {
-
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- unsigned RegCount = ST.getMaxNumSGPRs(MF);
+static unsigned findPrivateSegmentWaveByteOffsetRegIndex(unsigned RegCount) {
unsigned Reg;
// Try to place it in a hole after PrivateSegmentBufferReg.
@@ -134,9 +130,22 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
// wave offset before it.
Reg = RegCount - 5;
}
+
+ return Reg;
+}
+
+unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
+ const MachineFunction &MF) const {
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ unsigned Reg = findPrivateSegmentWaveByteOffsetRegIndex(ST.getMaxNumSGPRs(MF));
return AMDGPU::SGPR_32RegClass.getRegister(Reg);
}
+unsigned SIRegisterInfo::reservedStackPtrOffsetReg(
+ const MachineFunction &MF) const {
+ return AMDGPU::SGPR32;
+}
+
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
@@ -198,15 +207,33 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
}
+ unsigned StackPtrReg = MFI->getStackPtrOffsetReg();
+ if (StackPtrReg != AMDGPU::NoRegister) {
+ reserveRegisterTuples(Reserved, StackPtrReg);
+ assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
+ }
+
+ unsigned FrameReg = MFI->getFrameOffsetReg();
+ if (FrameReg != AMDGPU::NoRegister) {
+ reserveRegisterTuples(Reserved, FrameReg);
+ assert(!isSubRegister(ScratchRSrcReg, FrameReg));
+ }
+
return Reserved;
}
bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
- return Fn.getFrameInfo().hasStackObjects();
+ const SIMachineFunctionInfo *Info = Fn.getInfo<SIMachineFunctionInfo>();
+ if (Info->isEntryFunction()) {
+ const MachineFrameInfo &MFI = Fn.getFrameInfo();
+ return MFI.hasStackObjects() || MFI.hasCalls();
+ }
+
+ // May need scavenger for dealing with callee saved registers.
+ return true;
}
-bool
-SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
+bool SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
return MF.getFrameInfo().hasStackObjects();
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 679ed229758..b91cdddc552 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -17,6 +17,7 @@
#include "AMDGPURegisterInfo.h"
#include "SIDefines.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
namespace llvm {
@@ -57,8 +58,16 @@ public:
unsigned reservedPrivateSegmentWaveByteOffsetReg(
const MachineFunction &MF) const;
+ unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const;
+
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID) const override;
+
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
+
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
@@ -228,6 +237,11 @@ public:
const int *getRegUnitPressureSets(unsigned RegUnit) const override;
+ unsigned getReturnAddressReg(const MachineFunction &MF) const {
+ // Not a callee saved register.
+ return AMDGPU::SGPR30_SGPR31;
+ }
+
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 593439c2a3c..f2d8b6f7b7a 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -186,11 +186,23 @@ def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32">;
def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64">;
def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64">;
-let isTerminator = 1, isBarrier = 1,
- isBranch = 1, isIndirectBranch = 1 in {
+let isTerminator = 1, isBarrier = 1, SchedRW = [WriteBranch] in {
+
+let isBranch = 1, isIndirectBranch = 1 in {
def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">;
+} // End isBranch = 1, isIndirectBranch = 1
+
+let isReturn = 1 in {
+// Define variant marked as return rather than branch.
+def S_SETPC_B64_return : SOP1_1<"", [(AMDGPUret_flag i64:$src0)]>;
+}
+} // End isTerminator = 1, isBarrier = 1
+
+let isCall = 1 in {
+def S_SWAPPC_B64 : SOP1_64 <"s_swappc_b64"
+>;
}
-def S_SWAPPC_B64 : SOP1_64 <"s_swappc_b64">;
+
def S_RFE_B64 : SOP1_1 <"s_rfe_b64">;
let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC] in {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d565c84bfed..2abd4afad3b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -518,7 +518,18 @@ bool isCompute(CallingConv::ID cc) {
}
bool isEntryFunctionCC(CallingConv::ID CC) {
- return true;
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_HS:
+ return true;
+ default:
+ return false;
+ }
}
bool isSI(const MCSubtargetInfo &STI) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index d6c836eb748..8e74aa2cc9a 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -262,7 +262,6 @@ bool isEntryFunctionCC(CallingConv::ID CC);
LLVM_READNONE
inline bool isKernel(CallingConv::ID CC) {
switch (CC) {
- case CallingConv::C:
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
return true;
OpenPOWER on IntegriCloud