summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-02-12 06:31:30 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-02-12 06:31:30 +0000
commit296b849163d3f3ed0c342c71c3d5800accda438c (patch)
treee218a2364e6080456137e4f2f25ed97107c5acdd /llvm/test/CodeGen/AMDGPU
parentf71d653879fb5e9cb2c13e060cbb080f6ab2ff4c (diff)
downloadbcm5719-llvm-296b849163d3f3ed0c342c71c3d5800accda438c.tar.gz
bcm5719-llvm-296b849163d3f3ed0c342c71c3d5800accda438c.zip
AMDGPU: Set flat_scratch from flat_scratch_init reg
This was hardcoded to the static private size, but this would be missing the offset and additional size for someday when we have dynamic sizing. Also stops always initializing flat_scratch even when unused. In the future we should stop emitting this unless flat instructions are used to access private memory. For example this will initialize it almost always on VI because flat is used for global access. llvm-svn: 260658
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll19
-rw-r--r--llvm/test/CodeGen/AMDGPU/global_atomics.ll78
-rw-r--r--llvm/test/CodeGen/AMDGPU/hsa.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/private-memory.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll26
7 files changed, 40 insertions, 99 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index 4760bce2649..6fd58afdd44 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -188,8 +188,6 @@ done:
}
; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32:
-; VI-DAG: s_movk_i32 flat_scratch_lo, 0x0
-; VI-DAG: s_movk_i32 flat_scratch_hi, 0x0
; GCN: s_and_saveexec_b64
; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
diff --git a/llvm/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll b/llvm/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
index 1a37e3c75fa..8227d4c873e 100644
--- a/llvm/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
@@ -1,11 +1,22 @@
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck -check-prefix=HSA-DEFAULT %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA-NODEFAULT %s
-; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri | FileCheck -check-prefix=NOHSA-DEFAULT %s
-; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
+; There are no stack objects even though flat is used by default, so
+; flat_scratch_init should be disabled.
+
+; ALL-LABEL: {{^}}test:
+; HSA: .amd_kernel_code_t
+; HSA: enable_sgpr_flat_scratch_init = 0
+; HSA: .end_amd_kernel_code_t
+
+; ALL-NOT: flat_scr
+
; HSA-DEFAULT: flat_store_dword
; HSA-NODEFAULT: buffer_store_dword
+
; NOHSA-DEFAULT: buffer_store_dword
; NOHSA-NODEFAULT: flat_store_dword
define void @test(i32 addrspace(1)* %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll
index 6786e4a2f37..a92ee89d798 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll
@@ -24,8 +24,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
@@ -38,8 +36,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -71,8 +67,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -83,8 +77,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64:
; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -117,8 +109,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -130,8 +120,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -163,8 +151,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -175,8 +161,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64:
; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -209,8 +193,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -222,8 +204,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -255,8 +235,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -267,8 +245,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64:
; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -301,8 +277,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -314,8 +288,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -347,8 +319,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -359,8 +329,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64:
; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -393,8 +361,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -406,8 +372,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -439,8 +403,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -451,8 +413,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64:
; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -485,8 +445,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -498,8 +456,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -531,8 +487,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -543,8 +497,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64:
; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -577,8 +529,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -590,8 +540,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -623,8 +571,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -635,8 +581,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64:
; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -669,8 +613,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -682,8 +624,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -715,8 +655,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -727,8 +665,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64:
; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -771,8 +707,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -804,8 +738,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -816,8 +748,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -850,8 +780,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -863,8 +791,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
@@ -896,8 +822,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
entry:
@@ -908,8 +832,6 @@ entry:
; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64:
; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
-; VI: s_movk_i32 flat_scratch_lo, 0x0
-; VI: s_movk_i32 flat_scratch_hi, 0x0
; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
; GCN: buffer_store_dword [[RET]]
define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
diff --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll
index c089dfd9a97..14c130a8190 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa.ll
@@ -28,7 +28,7 @@
; ELF: Symbol {
; ELF: Name: simple
-; ELF: Size: 296
+; ELF: Size: 288
; ELF: Type: AMDGPU_HSA_KERNEL (0xA)
; ELF: }
diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
index da40c8593e9..2a120bdd57e 100644
--- a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
+++ b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
@@ -17,7 +17,7 @@
; GCNHSA: .amd_kernel_code_t
; GCNHSA: compute_pgm_rsrc2_scratch_en = 1
-; GCNHSA: compute_pgm_rsrc2_user_sgpr = 6
+; GCNHSA: compute_pgm_rsrc2_user_sgpr = 8
; GCNHSA: compute_pgm_rsrc2_tgid_x_en = 1
; GCNHSA: compute_pgm_rsrc2_tgid_y_en = 0
; GCNHSA: compute_pgm_rsrc2_tgid_z_en = 0
@@ -29,7 +29,7 @@
; GCNHSA: enable_sgpr_queue_ptr = 0
; GCNHSA: enable_sgpr_kernarg_segment_ptr = 1
; GCNHSA: enable_sgpr_dispatch_id = 0
-; GCNHSA: enable_sgpr_flat_scratch_init = 0
+; GCNHSA: enable_sgpr_flat_scratch_init = 1
; GCNHSA: enable_sgpr_private_segment_size = 0
; GCNHSA: enable_sgpr_grid_workgroup_count_x = 0
; GCNHSA: enable_sgpr_grid_workgroup_count_y = 0
@@ -39,8 +39,8 @@
; GCNHSA: .end_amd_kernel_code_t
-; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s7 offen
-; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s7 offen
+; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s9 offen
+; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s9 offen
; Scratch size = alloca size + emergency stack slot
; ALL: ; ScratchSize: 32772
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory.ll b/llvm/test/CodeGen/AMDGPU/private-memory.ll
index b87944d7051..1597a2ad365 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory.ll
@@ -41,6 +41,10 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; HSA-ALLOCA: workitem_private_segment_byte_size = 24
; HSA-ALLOCA: .end_amd_kernel_code_t
+; HSA-ALLOCA: s_mov_b32 flat_scratch_lo, s7
+; HSA-ALLOCA: s_add_u32 s6, s6, s9
+; HSA-ALLOCA: s_lshr_b32 flat_scratch_hi, s6, 8
+
; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
index 69147071348..d8d83dd1b07 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
-; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
-; XUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA %s
-; XUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s
+; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s
; This ends up using all 256 registers and requires register
; scavenging which will fail to find an unsued register.
@@ -13,12 +13,18 @@
; GCN-LABEL: {{^}}spill_vgpr_compute:
-; GCN: s_mov_b32 s16, s3
-; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
-; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
-; GCN-NEXT: s_mov_b32 s14, -1
-; SI-NEXT: s_mov_b32 s15, 0x98f000
-; VI-NEXT: s_mov_b32 s15, 0x980000
+; HSA: enable_sgpr_private_segment_buffer = 1
+; HSA: enable_sgpr_flat_scratch_init = 0
+; HSA: workitem_private_segment_byte_size = 1024
+
+; GCN-NOT: flat_scr
+
+; GCNMESA: s_mov_b32 s16, s3
+; GCNMESA: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GCNMESA-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCNMESA-NEXT: s_mov_b32 s14, -1
+; SIMESA-NEXT: s_mov_b32 s15, 0x98f000
+; VIMESA-NEXT: s_mov_b32 s15, 0x980000
; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
OpenPOWER on IntegriCloud