diff options
author | Roman Lebedev <lebedev.ri@gmail.com> | 2019-05-21 21:49:14 +0000 |
---|---|---|
committer | Roman Lebedev <lebedev.ri@gmail.com> | 2019-05-21 21:49:14 +0000 |
commit | 675307b1f193d537549e29b0c1e51b9062e6ed48 (patch) | |
tree | 4d0e6f10b35b4aeacbb2f032d56666bb10306b6d | |
parent | 21e8ec8d4f2785df816143b5a1362470336a4f76 (diff) | |
download | bcm5719-llvm-675307b1f193d537549e29b0c1e51b9062e6ed48.tar.gz bcm5719-llvm-675307b1f193d537549e29b0c1e51b9062e6ed48.zip |
[NFC][AMDGPU] Autogenerate llvm.amdgcn.s.barrier.ll test
llvm-svn: 361320
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll | 103 |
1 files changed, 93 insertions, 10 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll index 5a29072a07c..20c84c5b632 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @@ -1,15 +1,98 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=NOAUTO %s -; RUN: llc -march=amdgcn -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=AUTO %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=NOAUTO %s -; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=AUTO %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,VARIANT0 %s +; RUN: llc -march=amdgcn -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,VARIANT1 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,VARIANT2 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,VARIANT3 %s -; GCN-LABEL: {{^}}test_barrier: -; GFX8: buffer_store_dword -; GFX9: global_store_dword -; NOAUTO: s_waitcnt -; AUTO-NOT: s_waitcnt -; GCN: s_barrier define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 { +; VARIANT0-LABEL: test_barrier: +; VARIANT0: ; %bb.0: ; %entry +; VARIANT0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; VARIANT0-NEXT: s_load_dword s2, s[0:1], 0xb +; VARIANT0-NEXT: s_mov_b32 s7, 0xf000 +; VARIANT0-NEXT: s_mov_b32 s6, 0 +; VARIANT0-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; VARIANT0-NEXT: v_mov_b32_e32 v2, 0 +; VARIANT0-NEXT: s_waitcnt lgkmcnt(0) +; VARIANT0-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 +; VARIANT0-NEXT: s_add_i32 s2, s2, -1 +; VARIANT0-NEXT: s_waitcnt vmcnt(0) expcnt(0) +; VARIANT0-NEXT: s_barrier +; VARIANT0-NEXT: v_sub_i32_e32 v3, vcc, s2, v0 +; VARIANT0-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; VARIANT0-NEXT: v_lshl_b64 v[3:4], v[3:4], 2 +; VARIANT0-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64 +; VARIANT0-NEXT: s_waitcnt vmcnt(0) +; VARIANT0-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 +; VARIANT0-NEXT: s_endpgm +; +; VARIANT1-LABEL: test_barrier: +; VARIANT1: ; %bb.0: ; %entry +; VARIANT1-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; VARIANT1-NEXT: s_load_dword s2, s[0:1], 0xb +; VARIANT1-NEXT: s_mov_b32 s7, 0xf000 +; VARIANT1-NEXT: s_mov_b32 s6, 0 +; VARIANT1-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; VARIANT1-NEXT: v_mov_b32_e32 v2, 0 +; VARIANT1-NEXT: s_waitcnt lgkmcnt(0) +; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 +; VARIANT1-NEXT: s_add_i32 s2, s2, -1 +; VARIANT1-NEXT: s_barrier +; VARIANT1-NEXT: v_sub_i32_e32 v3, vcc, s2, v0 +; VARIANT1-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; VARIANT1-NEXT: v_lshl_b64 v[3:4], v[3:4], 2 +; VARIANT1-NEXT: s_waitcnt expcnt(0) +; VARIANT1-NEXT: buffer_load_dword v0, v[3:4], s[4:7], 0 addr64 +; VARIANT1-NEXT: s_waitcnt vmcnt(0) +; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 +; VARIANT1-NEXT: s_endpgm +; +; VARIANT2-LABEL: test_barrier: +; VARIANT2: ; %bb.0: ; %entry +; VARIANT2-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; VARIANT2-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; VARIANT2-NEXT: s_load_dword s0, s[0:1], 0x2c +; VARIANT2-NEXT: s_waitcnt lgkmcnt(0) +; VARIANT2-NEXT: v_mov_b32_e32 v2, s3 +; VARIANT2-NEXT: v_add_co_u32_e32 v1, vcc, s2, v1 +; VARIANT2-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; VARIANT2-NEXT: global_store_dword v[1:2], v0, off +; VARIANT2-NEXT: s_waitcnt vmcnt(0) +; VARIANT2-NEXT: s_barrier +; VARIANT2-NEXT: s_add_i32 s0, s0, -1 +; VARIANT2-NEXT: v_sub_u32_e32 v3, s0, v0 +; VARIANT2-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; VARIANT2-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4] +; VARIANT2-NEXT: v_mov_b32_e32 v0, s3 +; VARIANT2-NEXT: v_add_co_u32_e32 v3, vcc, s2, v3 +; VARIANT2-NEXT: v_addc_co_u32_e32 v4, vcc, v0, v4, vcc +; VARIANT2-NEXT: global_load_dword v0, v[3:4], off +; VARIANT2-NEXT: s_waitcnt vmcnt(0) +; VARIANT2-NEXT: global_store_dword v[1:2], v0, off +; VARIANT2-NEXT: s_endpgm +; +; VARIANT3-LABEL: test_barrier: +; VARIANT3: ; %bb.0: ; %entry +; VARIANT3-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; VARIANT3-NEXT: v_lshlrev_b32_e32 v1, 2, v0 +; VARIANT3-NEXT: s_load_dword s0, s[0:1], 0x2c +; VARIANT3-NEXT: s_waitcnt lgkmcnt(0) +; VARIANT3-NEXT: v_mov_b32_e32 v2, s3 +; VARIANT3-NEXT: v_add_co_u32_e32 v1, vcc, s2, v1 +; VARIANT3-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc +; VARIANT3-NEXT: global_store_dword v[1:2], v0, off +; VARIANT3-NEXT: s_barrier +; VARIANT3-NEXT: s_add_i32 s0, s0, -1 +; VARIANT3-NEXT: v_sub_u32_e32 v3, s0, v0 +; VARIANT3-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; VARIANT3-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4] +; VARIANT3-NEXT: v_mov_b32_e32 v0, s3 +; VARIANT3-NEXT: v_add_co_u32_e32 v3, vcc, s2, v3 +; VARIANT3-NEXT: v_addc_co_u32_e32 v4, vcc, v0, v4, vcc +; VARIANT3-NEXT: global_load_dword v0, v[3:4], off +; VARIANT3-NEXT: s_waitcnt vmcnt(0) +; VARIANT3-NEXT: global_store_dword v[1:2], v0, off +; VARIANT3-NEXT: s_endpgm entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp |