summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp40
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h12
-rw-r--r--llvm/test/CodeGen/AMDGPU/calling-conventions.ll14
4 files changed, 45 insertions, 23 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4696f645cf8..eda825d8c6e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1171,7 +1171,7 @@ SDValue SITargetLowering::LowerFormalArguments(
*DAG.getContext());
bool IsShader = AMDGPU::isShader(CallConv);
- bool IsKernel = !IsShader;
+ bool IsKernel = AMDGPU::isKernel(CallConv);
bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);
if (IsShader) {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index debb42fc0fc..8e612d2ddfd 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -75,34 +75,48 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PrivateMemoryInputPtr(false) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
+ FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
+ WavesPerEU = ST.getWavesPerEU(*F);
- PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
+ // Non-entry functions have no special inputs for now.
+ // TODO: Return early for non-entry CCs.
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ CallingConv::ID CC = F->getCallingConv();
+ if (CC == CallingConv::AMDGPU_PS)
+ PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
- if (!AMDGPU::isShader(F->getCallingConv())) {
+ if (AMDGPU::isKernel(CC)) {
KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
}
- if (F->hasFnAttribute("amdgpu-work-group-id-y") || ST.debuggerEmitPrologue())
+ if (ST.debuggerEmitPrologue()) {
+ // Enable everything.
WorkGroupIDY = true;
-
- if (F->hasFnAttribute("amdgpu-work-group-id-z") || ST.debuggerEmitPrologue())
WorkGroupIDZ = true;
-
- if (F->hasFnAttribute("amdgpu-work-item-id-y") || ST.debuggerEmitPrologue())
WorkItemIDY = true;
-
- if (F->hasFnAttribute("amdgpu-work-item-id-z") || ST.debuggerEmitPrologue())
WorkItemIDZ = true;
+ } else {
+ if (F->hasFnAttribute("amdgpu-work-group-id-y"))
+ WorkGroupIDY = true;
+
+ if (F->hasFnAttribute("amdgpu-work-group-id-z"))
+ WorkGroupIDZ = true;
+
+ if (F->hasFnAttribute("amdgpu-work-item-id-y"))
+ WorkItemIDY = true;
+
+ if (F->hasFnAttribute("amdgpu-work-item-id-z"))
+ WorkItemIDZ = true;
+ }
// X, XY, and XYZ are the only supported combinations, so make sure Y is
// enabled if Z is.
if (WorkItemIDZ)
WorkItemIDY = true;
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
bool MaySpill = ST.isVGPRSpillingEnabled(*F);
bool HasStackObjects = FrameInfo.hasStackObjects();
@@ -129,12 +143,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
// We don't need to worry about accessing spills with flat instructions.
// TODO: On VI where we must use flat for global, we should be able to omit
// this if it is never used for generic access.
- if (HasStackObjects && ST.getGeneration() >= SISubtarget::SEA_ISLANDS &&
- ST.isAmdHsaOS())
+ if (HasStackObjects && ST.hasFlatAddressSpace() && ST.isAmdHsaOS())
FlatScratchInit = true;
-
- FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
- WavesPerEU = ST.getWavesPerEU(*F);
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 4a43ecda177..d6c836eb748 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -258,6 +258,18 @@ bool isCompute(CallingConv::ID CC);
LLVM_READNONE
bool isEntryFunctionCC(CallingConv::ID CC);
+// FIXME: Remove this when calling conventions cleaned up
+LLVM_READNONE
+inline bool isKernel(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return true;
+ default:
+ return false;
+ }
+}
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index 55c2b503430..677147b6f4e 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -12,13 +12,13 @@ entry:
}
; FIXME: This is treated like a kernel
-; GCN-LABEL: {{^}}func:
-; GCN: s_endpgm
-define spir_func void @func(i32 addrspace(1)* %out) {
-entry:
- store i32 0, i32 addrspace(1)* %out
- ret void
-}
+; XGCN-LABEL: {{^}}func:
+; XGCN: s_endpgm
+; define spir_func void @func(i32 addrspace(1)* %out) {
+; entry:
+; store i32 0, i32 addrspace(1)* %out
+; ret void
+; }
; GCN-LABEL: {{^}}ps_ret_cc_f16:
; SI: v_cvt_f16_f32_e32 v0, v0
OpenPOWER on IntegriCloud