diff options
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp | 7 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/ds_write2.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/ds_write2st64.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/global-saddr.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/madak.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/memory_clause.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll | 2 | 
11 files changed, 19 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp index 3da043f3709..ee39eb04d83 100644 --- a/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp @@ -43,6 +43,11 @@  using namespace llvm; +static cl::opt<bool> EnableGlobalSGPRAddr( +  "amdgpu-enable-global-sgpr-addr", +  cl::desc("Enable use of SGPR regs for GLOBAL LOAD/STORE instructions"), +  cl::init(false)); +  STATISTIC(NumSGPRGlobalOccurs, "Number of global ld/st opportunities");  STATISTIC(NumSGPRGlobalSaddrs, "Number of global sgpr instructions converted"); @@ -155,6 +160,8 @@ static bool fixupGlobalSaddr(MachineBasicBlock &MBB,                               const GCNSubtarget &ST,                               const SIInstrInfo *TII,                               const SIRegisterInfo *TRI) { +  if (!EnableGlobalSGPRAddr) +    return false;    bool FuncModified = false;    MachineBasicBlock::iterator I, Next;    for (I = MBB.begin(); I != MBB.end(); I = Next) { diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll index e0bde491619..03436f6b3a3 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll @@ -1,5 +1,5 @@  ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,CI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+flat-for-global < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+flat-for-global -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9 %s  @lds = addrspace(3) global [512 x float] undef, align 4  @lds.f64 = addrspace(3) global [512 x double] undef, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll b/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll index 6c796749356..2a405352f25 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll @@ -1,5 +1,5 @@  ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s  @lds = addrspace(3) global [512 x float] undef, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir b/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir index e5df4f0d11d..8ac7c3e14a1 100644 --- a/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir +++ b/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir @@ -1,4 +1,4 @@ -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-fixup-vector-isel %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-fixup-vector-isel -amdgpu-enable-global-sgpr-addr %s -o - | FileCheck -check-prefix=GCN %s  # Coverage tests for GLOBAL_* to their _SADDR equivalent. diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr.ll b/llvm/test/CodeGen/AMDGPU/global-saddr.ll index fff10faa72e..b21fd985226 100644 --- a/llvm/test/CodeGen/AMDGPU/global-saddr.ll +++ b/llvm/test/CodeGen/AMDGPU/global-saddr.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefix=GFX9 %s  ; Test for a conv2d like sequence of loads. diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll index 0b4242a2f4e..07ee65526c9 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll @@ -1,4 +1,4 @@ -; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-amdgpu-aa=0 -mattr=+flat-for-global,-fp64-fp16-denormals -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s  ; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr: diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll index 8edfe8ab539..df1ef1cba71 100644 --- a/llvm/test/CodeGen/AMDGPU/madak.ll +++ b/llvm/test/CodeGen/AMDGPU/madak.ll @@ -1,6 +1,6 @@  ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s  ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8_9 %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX8_9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefixes=GCN,GFX9,GFX8_9 %s  declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone  declare float @llvm.fabs.f32(float) nounwind readnone diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll index 856dc46b841..179cb3f625d 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-load.ll @@ -1,7 +1,7 @@  ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s  ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s  declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll index e8073ad75a3..87c43949df6 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-store.ll @@ -1,7 +1,7 @@  ; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s  ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX89 %s -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck --check-prefixes=GCN,GFX9,GFX89 %s  declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll index 2bfe0d2b23d..9ae068a4340 100644 --- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll +++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -amdgpu-enable-global-sgpr-addr < %s | FileCheck -check-prefix=GCN %s  ; GCN-LABEL: {{^}}vector_clause:  ; GCN:      global_load_dwordx4 diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll index 7f11658cc6b..e9a6ba98942 100644 --- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -1,5 +1,5 @@  ; RUN: llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=bonaire -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s -; RUN: llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s +; RUN: llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=gfx900 -enable-amdgpu-aa=0 -verify-machineinstrs -enable-misched -enable-aa-sched-mi -amdgpu-enable-global-sgpr-addr < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s  declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)  declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)  | 

