diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll | 4 |
4 files changed, 18 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index fe67f59feb6..f057a3a8bd9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -73,6 +73,13 @@ def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access", "Support unaligned scratch loads and stores" >; +// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support +// XNACK. The current default kernel driver setting is: +// - graphics ring: XNACK disabled +// - compute ring: XNACK enabled +// +// If XNACK is enabled, the VMEM latency can be worse. +// If XNACK is disabled, the 2 SGPRs can be used for general purposes. def FeatureXNACK : SubtargetFeature<"xnack", "EnableXNACK", "true", diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 0fdd203b3d0..41633a2b6a0 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1188,7 +1188,7 @@ unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST, return 4; // FLAT_SCRATCH, VCC (in that order) } - if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + if (ST.isXNACKEnabled()) return 4; // XNACK, VCC (in that order) return 2; // VCC. diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll index da49517e003..e4f6e72e697 100644 --- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll @@ -9,10 +9,10 @@ ; FIXME: Should be ablo to skip this copying of the private segment ; buffer because all the SGPR spills are to VGPRs. -; ALL: s_mov_b64 s[6:7], s[2:3] -; ALL: s_mov_b64 s[4:5], s[0:1] +; ALL: s_mov_b64 s[10:11], s[2:3] +; ALL: s_mov_b64 s[8:9], s[0:1] ; ALL: SGPRBlocks: 1 -; ALL: NumSGPRsForWavesPerEU: 12 +; ALL: NumSGPRsForWavesPerEU: 14 define void @max_12_sgprs(i32 addrspace(1)* %out1, i32 addrspace(1)* %out2, @@ -46,9 +46,9 @@ define void @max_12_sgprs(i32 addrspace(1)* %out1, ; TOSGPR: SGPRBlocks: 1 ; TOSGPR: NumSGPRsForWavesPerEU: 16 -; TOSMEM: s_mov_b64 s[6:7], s[2:3] -; TOSMEM: s_mov_b32 s9, s13 -; TOSMEM: s_mov_b64 s[4:5], s[0:1] +; TOSMEM: s_mov_b64 s[10:11], s[2:3] +; TOSMEM: s_mov_b64 s[8:9], s[0:1] +; TOSMEM: s_mov_b32 s7, s13 ; TOSMEM: SGPRBlocks: 1 ; TOSMEM: NumSGPRsForWavesPerEU: 16 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll index 1ee9100c2eb..f15cbef56b1 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-reg.ll @@ -1,5 +1,9 @@ ; RUN: llc -march=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s + +; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=stoney -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefix=VI-NOXNACK -check-prefix=GCN %s + ; RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=stoney -verify-machineinstrs < %s | FileCheck -check-prefix=VI-XNACK -check-prefix=GCN %s |