diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/local-atomics.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/local-atomics.ll | 214 |
1 files changed, 190 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/local-atomics.ll b/llvm/test/CodeGen/AMDGPU/local-atomics.ll index de029d964b0..d2167f5a730 100644 --- a/llvm/test/CodeGen/AMDGPU/local-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomics.ll @@ -1,13 +1,18 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CIVI -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI,SICIVI,FUNC %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s +; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32: ; EG: LDS_WRXCHG_RET * -; GCN: s_load_dword [[SPTR:s[0-9]+]], -; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], +; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -18,6 +23,9 @@ define amdgpu_kernel void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 a } ; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_WRXCHG_RET * ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -31,9 +39,13 @@ define amdgpu_kernel void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out ; XXX - Is it really necessary to load 4 into VGPR? ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32: ; EG: LDS_ADD_RET * -; GCN: s_load_dword [[SPTR:s[0-9]+]], -; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], +; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; GCN: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm @@ -44,6 +56,9 @@ define amdgpu_kernel void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 ad } ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_ADD_RET * ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -55,6 +70,9 @@ define amdgpu_kernel void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, } ; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_ADD_RET * ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 @@ -70,7 +88,11 @@ define amdgpu_kernel void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1) ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32: ; EG: LDS_ADD_RET * -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -81,7 +103,11 @@ define amdgpu_kernel void @lds_atomic_add1_ret_i32(i32 addrspace(1)* %out, i32 a ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_offset: ; EG: LDS_ADD_RET * -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -92,6 +118,9 @@ define amdgpu_kernel void @lds_atomic_add1_ret_i32_offset(i32 addrspace(1)* %out } ; FUNC-LABEL: {{^}}lds_atomic_add1_ret_i32_bad_si_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_ADD_RET * ; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; CIVI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 @@ -107,6 +136,10 @@ define amdgpu_kernel void @lds_atomic_add1_ret_i32_bad_si_offset(i32 addrspace(1 ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32: ; EG: LDS_SUB_RET * + +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_rtn_u32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -117,6 +150,10 @@ define amdgpu_kernel void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 ad ; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset: ; EG: LDS_SUB_RET * + +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -128,7 +165,11 @@ define amdgpu_kernel void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, ; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32: ; EG: LDS_SUB_RET * -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -139,7 +180,11 @@ define amdgpu_kernel void @lds_atomic_sub1_ret_i32(i32 addrspace(1)* %out, i32 a ; FUNC-LABEL: {{^}}lds_atomic_sub1_ret_i32_offset: ; EG: LDS_SUB_RET * -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[ONE]] offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -151,6 +196,10 @@ define amdgpu_kernel void @lds_atomic_sub1_ret_i32_offset(i32 addrspace(1)* %out ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32: ; EG: LDS_AND_RET * + +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_and_rtn_b32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { @@ -160,6 +209,9 @@ define amdgpu_kernel void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 ad } ; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_AND_RET * ; GCN: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -171,6 +223,9 @@ define amdgpu_kernel void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, } ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_OR_RET * ; GCN: ds_or_rtn_b32 ; GCN: s_endpgm @@ -181,6 +236,9 @@ define amdgpu_kernel void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 add } ; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_OR_RET * ; GCN: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -192,6 +250,9 @@ define amdgpu_kernel void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, } ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_XOR_RET * ; GCN: ds_xor_rtn_b32 ; GCN: s_endpgm @@ -202,6 +263,9 @@ define amdgpu_kernel void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 ad } ; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_XOR_RET * ; GCN: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -221,6 +285,9 @@ define amdgpu_kernel void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, ; } ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MIN_INT_RET * ; GCN: ds_min_rtn_i32 ; GCN: s_endpgm @@ -231,6 +298,9 @@ define amdgpu_kernel void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 ad } ; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MIN_INT_RET * ; GCN: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -242,6 +312,9 @@ define amdgpu_kernel void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, } ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MAX_INT_RET * ; GCN: ds_max_rtn_i32 ; GCN: s_endpgm @@ -252,6 +325,9 @@ define amdgpu_kernel void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 ad } ; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MAX_INT_RET * ; GCN: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -263,6 +339,9 @@ define amdgpu_kernel void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, } ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MIN_UINT_RET * ; GCN: ds_min_rtn_u32 ; GCN: s_endpgm @@ -273,6 +352,9 @@ define amdgpu_kernel void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 a } ; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MIN_UINT_RET * ; GCN: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -284,6 +366,9 @@ define amdgpu_kernel void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out } ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MAX_UINT_RET * ; GCN: ds_max_rtn_u32 ; GCN: s_endpgm @@ -294,6 +379,9 @@ define amdgpu_kernel void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 a } ; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; EG: LDS_MAX_UINT_RET * ; GCN: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -305,9 +393,12 @@ define amdgpu_kernel void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out } ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32: -; GCN: s_load_dword [[SPTR:s[0-9]+]], -; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], +; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; GCN: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -316,6 +407,9 @@ define amdgpu_kernel void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nou } ; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -325,9 +419,12 @@ define amdgpu_kernel void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32: -; GCN: s_load_dword [[SPTR:s[0-9]+]], -; GCN: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 -; GCN: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: s_load_dword [[SPTR:s[0-9]+]], +; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 +; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; GCN: ds_add_u32 [[VPTR]], [[DATA]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -336,6 +433,9 @@ define amdgpu_kernel void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) noun } ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -345,6 +445,9 @@ define amdgpu_kernel void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %pt } ; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -357,7 +460,10 @@ define amdgpu_kernel void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace( } ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32: -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -366,7 +472,10 @@ define amdgpu_kernel void @lds_atomic_add1_noret_i32(i32 addrspace(3)* %ptr) nou } ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_offset: -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_add_u32 v{{[0-9]+}}, [[ONE]] offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -376,6 +485,9 @@ define amdgpu_kernel void @lds_atomic_add1_noret_i32_offset(i32 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_add1_noret_i32_bad_si_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} ; CIVI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm @@ -388,6 +500,9 @@ define amdgpu_kernel void @lds_atomic_add1_noret_i32_bad_si_offset(i32 addrspace } ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_u32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -396,6 +511,9 @@ define amdgpu_kernel void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) noun } ; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -405,7 +523,10 @@ define amdgpu_kernel void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %pt } ; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32: -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -414,7 +535,10 @@ define amdgpu_kernel void @lds_atomic_sub1_noret_i32(i32 addrspace(3)* %ptr) nou } ; FUNC-LABEL: {{^}}lds_atomic_sub1_noret_i32_offset: -; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} +; SICIVI-DAG: s_mov_b32 m0 +; GFX9-NOT: m0 + +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1{{$}} ; GCN: ds_sub_u32 v{{[0-9]+}}, [[ONE]] offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -424,6 +548,9 @@ define amdgpu_kernel void @lds_atomic_sub1_noret_i32_offset(i32 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_and_b32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -432,6 +559,9 @@ define amdgpu_kernel void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) noun } ; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -441,6 +571,9 @@ define amdgpu_kernel void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %pt } ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_or_b32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -449,6 +582,9 @@ define amdgpu_kernel void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounw } ; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -458,6 +594,9 @@ define amdgpu_kernel void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr } ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_xor_b32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -466,6 +605,9 @@ define amdgpu_kernel void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) noun } ; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -482,6 +624,9 @@ define amdgpu_kernel void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %pt ; } ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_i32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -490,6 +635,9 @@ define amdgpu_kernel void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) noun } ; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -499,6 +647,9 @@ define amdgpu_kernel void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %pt } ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_i32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -507,6 +658,9 @@ define amdgpu_kernel void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) noun } ; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -516,6 +670,9 @@ define amdgpu_kernel void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %pt } ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_u32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -524,6 +681,9 @@ define amdgpu_kernel void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nou } ; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { @@ -533,6 +693,9 @@ define amdgpu_kernel void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %p } ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_u32 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind { @@ -541,6 +704,9 @@ define amdgpu_kernel void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nou } ; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset: +; SICIVI: s_mov_b32 m0 +; GFX9-NOT: m0 + ; GCN: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 ; GCN: s_endpgm define amdgpu_kernel void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { |

