diff options
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/global_atomics.ll | 78 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/hsa.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/private-memory.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll | 26 |
7 files changed, 40 insertions, 99 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index 4760bce2649..6fd58afdd44 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -188,8 +188,6 @@ done: } ; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32: -; VI-DAG: s_movk_i32 flat_scratch_lo, 0x0 -; VI-DAG: s_movk_i32 flat_scratch_hi, 0x0 ; GCN: s_and_saveexec_b64 ; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] diff --git a/llvm/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll b/llvm/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll index 1a37e3c75fa..8227d4c873e 100644 --- a/llvm/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll +++ b/llvm/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll @@ -1,11 +1,22 @@ -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck -check-prefix=HSA-DEFAULT %s -; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA-NODEFAULT %s -; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri | FileCheck -check-prefix=NOHSA-DEFAULT %s -; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s +; There are no stack objects even though flat is used by default, so +; flat_scratch_init should be disabled. + +; ALL-LABEL: {{^}}test: +; HSA: .amd_kernel_code_t +; HSA: enable_sgpr_flat_scratch_init = 0 +; HSA: .end_amd_kernel_code_t + +; ALL-NOT: flat_scr + ; HSA-DEFAULT: flat_store_dword ; HSA-NODEFAULT: buffer_store_dword + ; NOHSA-DEFAULT: buffer_store_dword ; NOHSA-NODEFAULT: flat_store_dword define void @test(i32 addrspace(1)* %out) { diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll index 6786e4a2f37..a92ee89d798 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll @@ -24,8 +24,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset: ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { @@ -38,8 +36,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset: ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -71,8 +67,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_add_i32_addr64: ; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -83,8 +77,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64: ; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -117,8 +109,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset: ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -130,8 +120,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset: ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -163,8 +151,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_and_i32_addr64: ; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -175,8 +161,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64: ; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -209,8 +193,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset: ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -222,8 +204,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset: ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -255,8 +235,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64: ; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -267,8 +245,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64: ; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -301,8 +277,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset: ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -314,8 +288,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset: ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -347,8 +319,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_max_i32_addr64: ; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -359,8 +329,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64: ; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -393,8 +361,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset: ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -406,8 +372,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset: ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -439,8 +403,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64: ; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -451,8 +413,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64: ; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -485,8 +445,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset: ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -498,8 +456,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset: ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -531,8 +487,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_min_i32_addr64: ; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -543,8 +497,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64: ; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -577,8 +529,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset: ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -590,8 +540,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset: ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -623,8 +571,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64: ; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -635,8 +581,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64: ; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -669,8 +613,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset: ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -682,8 +624,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset: ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -715,8 +655,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_or_i32_addr64: ; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -727,8 +665,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64: ; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -771,8 +707,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset: ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -804,8 +738,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64: ; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -816,8 +748,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64: ; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -850,8 +780,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset: ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -863,8 +791,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset: ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { @@ -896,8 +822,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64: ; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}} define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { entry: @@ -908,8 +832,6 @@ entry: ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64: ; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} -; VI: s_movk_i32 flat_scratch_lo, 0x0 -; VI: s_movk_i32 flat_scratch_hi, 0x0 ; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { diff --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll index c089dfd9a97..14c130a8190 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa.ll @@ -28,7 +28,7 @@ ; ELF: Symbol { ; ELF: Name: simple -; ELF: Size: 296 +; ELF: Size: 288 ; ELF: Type: AMDGPU_HSA_KERNEL (0xA) ; ELF: } diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll index da40c8593e9..2a120bdd57e 100644 --- a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll +++ b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll @@ -17,7 +17,7 @@ ; GCNHSA: .amd_kernel_code_t ; GCNHSA: compute_pgm_rsrc2_scratch_en = 1 -; GCNHSA: compute_pgm_rsrc2_user_sgpr = 6 +; GCNHSA: compute_pgm_rsrc2_user_sgpr = 8 ; GCNHSA: compute_pgm_rsrc2_tgid_x_en = 1 ; GCNHSA: compute_pgm_rsrc2_tgid_y_en = 0 ; GCNHSA: compute_pgm_rsrc2_tgid_z_en = 0 @@ -29,7 +29,7 @@ ; GCNHSA: enable_sgpr_queue_ptr = 0 ; GCNHSA: enable_sgpr_kernarg_segment_ptr = 1 ; GCNHSA: enable_sgpr_dispatch_id = 0 -; GCNHSA: enable_sgpr_flat_scratch_init = 0 +; GCNHSA: enable_sgpr_flat_scratch_init = 1 ; GCNHSA: enable_sgpr_private_segment_size = 0 ; GCNHSA: enable_sgpr_grid_workgroup_count_x = 0 ; GCNHSA: enable_sgpr_grid_workgroup_count_y = 0 @@ -39,8 +39,8 @@ ; GCNHSA: .end_amd_kernel_code_t -; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s7 offen -; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s7 offen +; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s9 offen +; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s9 offen ; Scratch size = alloca size + emergency stack slot ; ALL: ; ScratchSize: 32772 diff --git a/llvm/test/CodeGen/AMDGPU/private-memory.ll b/llvm/test/CodeGen/AMDGPU/private-memory.ll index b87944d7051..1597a2ad365 100644 --- a/llvm/test/CodeGen/AMDGPU/private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/private-memory.ll @@ -41,6 +41,10 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; HSA-ALLOCA: workitem_private_segment_byte_size = 24 ; HSA-ALLOCA: .end_amd_kernel_code_t +; HSA-ALLOCA: s_mov_b32 flat_scratch_lo, s7 +; HSA-ALLOCA: s_add_u32 s6, s6, s9 +; HSA-ALLOCA: s_lshr_b32 flat_scratch_hi, s6, 8 + ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0 ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll index 69147071348..d8d83dd1b07 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s -; XUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA %s -; XUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA %s +; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s +; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s ; This ends up using all 256 registers and requires register ; scavenging which will fail to find an unsued register. @@ -13,12 +13,18 @@ ; GCN-LABEL: {{^}}spill_vgpr_compute: -; GCN: s_mov_b32 s16, s3 -; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 -; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 -; GCN-NEXT: s_mov_b32 s14, -1 -; SI-NEXT: s_mov_b32 s15, 0x98f000 -; VI-NEXT: s_mov_b32 s15, 0x980000 +; HSA: enable_sgpr_private_segment_buffer = 1 +; HSA: enable_sgpr_flat_scratch_init = 0 +; HSA: workitem_private_segment_byte_size = 1024 + +; GCN-NOT: flat_scr + +; GCNMESA: s_mov_b32 s16, s3 +; GCNMESA: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GCNMESA-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GCNMESA-NEXT: s_mov_b32 s14, -1 +; SIMESA-NEXT: s_mov_b32 s15, 0x98f000 +; VIMESA-NEXT: s_mov_b32 s15, 0x980000 ; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill |