diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-06-22 08:39:52 +0000 | 
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-06-22 08:39:52 +0000 | 
| commit | 3f8e7a3dbcb7dc7c28ee294b6fdaa0bd1f8abeec (patch) | |
| tree | 99cf952eebe87c96ac2d1e17f775ab08642f3947 /llvm/test/CodeGen/AMDGPU | |
| parent | ea19c9473c4237c598e8a773f84fdeb695724a63 (diff) | |
| download | bcm5719-llvm-3f8e7a3dbcb7dc7c28ee294b6fdaa0bd1f8abeec.tar.gz bcm5719-llvm-3f8e7a3dbcb7dc7c28ee294b6fdaa0bd1f8abeec.zip  | |
AMDGPU: Add patterns for i32/i64 local atomic load/store
Not sure why the 32/64 split is needed in the atomic_load
store hierarchies. The regular PatFrags do this, but we don't
do it for the existing handling for global.
llvm-svn: 335325
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/atomic_load_local.ll | 52 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/atomic_store_local.ll | 53 | 
2 files changed, 105 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll new file mode 100644 index 00000000000..edca16871ac --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll @@ -0,0 +1,52 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s + +; GCN-LABEL: {{^}}atomic_load_monotonic_i32: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_b32 v0, v0{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i32 @atomic_load_monotonic_i32(i32 addrspace(3)* %ptr) { +  %load = load atomic i32, i32 addrspace(3)* %ptr monotonic, align 4 +  ret i32 %load +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_i32_offset: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i32 @atomic_load_monotonic_i32_offset(i32 addrspace(3)* %ptr) { +  %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16 +  %load = load atomic i32, i32 addrspace(3)* %gep monotonic, align 4 +  ret i32 %load +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_i64: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_b64 v[0:1], v0{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) { +  %load = load atomic i64, i64 addrspace(3)* %ptr monotonic, align 8 +  ret i64 %load +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_i64_offset: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) { +  %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i64 16 +  %load = load atomic i64, i64 addrspace(3)* %gep monotonic, align 8 +  ret i64 %load +} diff --git a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll new file mode 100644 index 00000000000..3b69070f3ea --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll @@ -0,0 +1,53 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s + +; GCN-LABEL: {{^}}atomic_store_monotonic_i32: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b32 v0, v1{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_i32(i32 addrspace(3)* %ptr, i32 %val) { +  store atomic i32 %val, i32 addrspace(3)* %ptr monotonic, align 4 +  ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i32: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b32 v0, v1 offset:64{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_offset_i32(i32 addrspace(3)* %ptr, i32 %val) { +  %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16 +  store atomic i32 %val, i32 addrspace(3)* %gep monotonic, align 4 +  ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_i64: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b64 v0, v[1:2]{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_i64(i64 addrspace(3)* %ptr, i64 %val) { +  store atomic i64 %val, i64 addrspace(3)* %ptr monotonic, align 8 +  ret void +} + +; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i64: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_write_b64 v0, v[1:2] offset:128{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define void @atomic_store_monotonic_offset_i64(i64 addrspace(3)* %ptr, i64 %val) { +  %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i64 16 +  store atomic i64 %val, i64 addrspace(3)* %gep monotonic, align 8 +  ret void +} +  | 

