summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaits.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h9
4 files changed, 19 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0c678c4b73b..4206e6fb869 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -893,6 +893,8 @@ SDValue SITargetLowering::LowerReturn(SDValue Chain,
return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs,
OutVals, DL, DAG);
+ Info->setIfReturnsVoid(Outs.size() == 0);
+
SmallVector<ISD::OutputArg, 48> Splits;
SmallVector<SDValue, 48> SplitVals;
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index 821aada526c..d3b41797871 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -84,6 +84,9 @@ private:
bool LastInstWritesM0;
+ /// \brief Whether the machine function returns void
+ bool ReturnsVoid;
+
/// \brief Get increment/decrement amount for this instruction.
Counters getHwCounts(MachineInstr &MI);
@@ -322,7 +325,9 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
const Counters &Required) {
// End of program? No need to wait on anything
- if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+ // A function not returning void needs to wait, because other bytecode will
+ // be appended after it and we don't know what it will be.
+ if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid)
return false;
// Figure out if the async instructions execute in order
@@ -465,6 +470,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
LastIssued = ZeroCounts;
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
+ ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();
memset(&UsedRegs, 0, sizeof(UsedRegs));
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 878fd768bba..49677fc2b0a 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -47,6 +47,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
PSInputAddr(0),
+ ReturnsVoid(true),
LDSWaveSpillSize(0),
PSInputEna(0),
NumUserSGPRs(0),
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 61d575385ff..846ee5de057 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -59,6 +59,7 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction {
// Graphics info.
unsigned PSInputAddr;
+ bool ReturnsVoid;
public:
// FIXME: Make private
@@ -288,6 +289,14 @@ public:
PSInputAddr |= 1 << Index;
}
+ bool returnsVoid() const {
+ return ReturnsVoid;
+ }
+
+ void setIfReturnsVoid(bool Value) {
+ ReturnsVoid = Value;
+ }
+
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
};
OpenPOWER on IntegriCloud