diff options
| author | Jay Foad <jay.foad@gmail.com> | 2019-07-16 17:44:54 +0000 |
|---|---|---|
| committer | Jay Foad <jay.foad@gmail.com> | 2019-07-16 17:44:54 +0000 |
| commit | 17060f0a54b681b8c7cec2f9ab465f6a1e51d968 (patch) | |
| tree | 0429d0886731a65d1847597e383301ef78c4d14d /llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll | |
| parent | c6fd5abecc42fb56525cb56b62f95603863ae6a0 (diff) | |
| download | bcm5719-llvm-17060f0a54b681b8c7cec2f9ab465f6a1e51d968.tar.gz bcm5719-llvm-17060f0a54b681b8c7cec2f9ab465f6a1e51d968.zip | |
[AMDGPU] Optimize atomic max/min
Summary:
Extend the atomic optimizer to handle signed and unsigned max and min
operations, as well as add and subtract.
Reviewers: arsenm, sheredom, critson, rampitec
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64328
llvm-svn: 366235
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index f3d50c9c490..5f7649c1c0e 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -194,3 +194,111 @@ entry: store i64 %old, i64 addrspace(1)* %out ret void } + +; GCN-LABEL: max_i32_varying: +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] +; GFX8MORE: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] +define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) { +entry: + %lane = call i32 @llvm.amdgcn.workitem.id.x() + %old = atomicrmw max i32 addrspace(3)* @local_var32, i32 %lane acq_rel + store i32 %old, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: max_i64_constant: +; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 +; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 +; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] +; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5 +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0 +; GCN: ds_max_rtn_i64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} +define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) { +entry: + %old = atomicrmw max i64 addrspace(3)* @local_var64, i64 5 acq_rel + store i64 %old, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: min_i32_varying: +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] +; GFX8MORE: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] +define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) { +entry: + %lane = call i32 @llvm.amdgcn.workitem.id.x() + %old = atomicrmw min i32 addrspace(3)* @local_var32, i32 %lane acq_rel + store i32 %old, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: min_i64_constant: +; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 +; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 +; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] +; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5 +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0 +; GCN: ds_min_rtn_i64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} +define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) { +entry: + %old = atomicrmw min i64 addrspace(3)* @local_var64, i64 5 acq_rel + store i64 %old, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: umax_i32_varying: +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] +; GFX8MORE: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] +define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) { +entry: + %lane = call i32 @llvm.amdgcn.workitem.id.x() + %old = atomicrmw umax i32 addrspace(3)* @local_var32, i32 %lane acq_rel + store i32 %old, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: umax_i64_constant: +; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 +; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 +; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] +; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5 +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0 +; GCN: ds_max_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} +define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) { +entry: + %old = atomicrmw umax i64 addrspace(3)* @local_var64, i64 5 acq_rel + store i64 %old, i64 addrspace(1)* %out + ret void +} + +; GCN-LABEL: umin_i32_varying: +; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63 +; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]] +; GFX8MORE: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]] +define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) { +entry: + %lane = call i32 @llvm.amdgcn.workitem.id.x() + %old = atomicrmw umin i32 addrspace(3)* @local_var32, i32 %lane acq_rel + store i32 %old, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: umin_i64_constant: +; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0 +; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0 +; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]] +; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]] +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5 +; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0 +; GCN: ds_min_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}} +define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) { +entry: + %old = atomicrmw umin i64 addrspace(3)* @local_var64, i64 5 acq_rel + store i64 %old, i64 addrspace(1)* %out + ret void +} |

