summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h3
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll70
3 files changed, 81 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 74dac6561cf..3896bcc0b0e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -358,9 +358,11 @@ unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
if (WavesPerEU >= getMaxWavesPerEU(Features))
return 0;
- unsigned MinNumSGPRs =
- alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
- getSGPRAllocGranule(Features)) + 1;
+
+ unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
+ if (Features.test(FeatureTrapHandler))
+ MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
+ MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
}
@@ -369,11 +371,13 @@ unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
assert(WavesPerEU != 0);
IsaVersion Version = getIsaVersion(Features);
- unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
- getSGPRAllocGranule(Features));
unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
if (Version.Major >= 8 && !Addressable)
AddressableNumSGPRs = 112;
+ unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
+ if (Features.test(FeatureTrapHandler))
+ MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
+ MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 5459ddfc7ef..54932d2bd3e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,7 +42,8 @@ namespace IsaInfo {
enum {
// The closed Vulkan driver sets 96, which limits the wave count to 8 but
// doesn't spill SGPRs as much as when 80 is set.
- FIXED_NUM_SGPRS_FOR_INIT_BUG = 96
+ FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
+ TRAP_NUM_SGPRS = 16
};
/// Instruction set architecture version.
diff --git a/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll b/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll
new file mode 100644
index 00000000000..21bb6513dd8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+trap-handler < %s | FileCheck %s --check-prefixes=GCN,TRAP-HANDLER-ENABLE
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=-trap-handler < %s | FileCheck %s --check-prefixes=GCN,TRAP-HANDLER-DISABLE
+
+; GCN-LABEL: {{^}}amdhsa_trap_num_sgprs
+; TRAP-HANDLER-ENABLE: NumSgprs: 60
+; TRAP-HANDLER-DISABLE: NumSgprs: 76
+define amdgpu_kernel void @amdhsa_trap_num_sgprs(
+ i32 addrspace(1)* %out0, i32 %in0,
+ i32 addrspace(1)* %out1, i32 %in1,
+ i32 addrspace(1)* %out2, i32 %in2,
+ i32 addrspace(1)* %out3, i32 %in3,
+ i32 addrspace(1)* %out4, i32 %in4,
+ i32 addrspace(1)* %out5, i32 %in5,
+ i32 addrspace(1)* %out6, i32 %in6,
+ i32 addrspace(1)* %out7, i32 %in7,
+ i32 addrspace(1)* %out8, i32 %in8,
+ i32 addrspace(1)* %out9, i32 %in9,
+ i32 addrspace(1)* %out10, i32 %in10,
+ i32 addrspace(1)* %out11, i32 %in11,
+ i32 addrspace(1)* %out12, i32 %in12,
+ i32 addrspace(1)* %out13, i32 %in13,
+ i32 addrspace(1)* %out14, i32 %in14,
+ i32 addrspace(1)* %out15, i32 %in15,
+ i32 addrspace(1)* %out16, i32 %in16,
+ i32 addrspace(1)* %out17, i32 %in17,
+ i32 addrspace(1)* %out18, i32 %in18,
+ i32 addrspace(1)* %out19, i32 %in19,
+ i32 addrspace(1)* %out20, i32 %in20,
+ i32 addrspace(1)* %out21, i32 %in21,
+ i32 addrspace(1)* %out22, i32 %in22,
+ i32 addrspace(1)* %out23, i32 %in23,
+ i32 addrspace(1)* %out24, i32 %in24,
+ i32 addrspace(1)* %out25, i32 %in25,
+ i32 addrspace(1)* %out26, i32 %in26,
+ i32 addrspace(1)* %out27, i32 %in27,
+ i32 addrspace(1)* %out28, i32 %in28,
+ i32 addrspace(1)* %out29, i32 %in29) {
+entry:
+ store i32 %in0, i32 addrspace(1)* %out0
+ store i32 %in1, i32 addrspace(1)* %out1
+ store i32 %in2, i32 addrspace(1)* %out2
+ store i32 %in3, i32 addrspace(1)* %out3
+ store i32 %in4, i32 addrspace(1)* %out4
+ store i32 %in5, i32 addrspace(1)* %out5
+ store i32 %in6, i32 addrspace(1)* %out6
+ store i32 %in7, i32 addrspace(1)* %out7
+ store i32 %in8, i32 addrspace(1)* %out8
+ store i32 %in9, i32 addrspace(1)* %out9
+ store i32 %in10, i32 addrspace(1)* %out10
+ store i32 %in11, i32 addrspace(1)* %out11
+ store i32 %in12, i32 addrspace(1)* %out12
+ store i32 %in13, i32 addrspace(1)* %out13
+ store i32 %in14, i32 addrspace(1)* %out14
+ store i32 %in15, i32 addrspace(1)* %out15
+ store i32 %in16, i32 addrspace(1)* %out16
+ store i32 %in17, i32 addrspace(1)* %out17
+ store i32 %in18, i32 addrspace(1)* %out18
+ store i32 %in19, i32 addrspace(1)* %out19
+ store i32 %in20, i32 addrspace(1)* %out20
+ store i32 %in21, i32 addrspace(1)* %out21
+ store i32 %in22, i32 addrspace(1)* %out22
+ store i32 %in23, i32 addrspace(1)* %out23
+ store i32 %in24, i32 addrspace(1)* %out24
+ store i32 %in25, i32 addrspace(1)* %out25
+ store i32 %in26, i32 addrspace(1)* %out26
+ store i32 %in27, i32 addrspace(1)* %out27
+ store i32 %in28, i32 addrspace(1)* %out28
+ store i32 %in29, i32 addrspace(1)* %out29
+ ret void
+}
OpenPOWER on IntegriCloud