[AMDGPU] Optimize atomic max/min

Summary: Extend the atomic optimizer to handle signed and unsigned max and min operations, as well as add and subtract. Reviewers: arsenm, sheredom, critson, rampitec Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, jfb, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64328 llvm-svn: 366235
author: Jay Foad <jay.foad@gmail.com> 2019-07-16 17:44:54 +0000
committer: Jay Foad <jay.foad@gmail.com> 2019-07-16 17:44:54 +0000
commit: 17060f0a54b681b8c7cec2f9ab465f6a1e51d968 (patch)
tree: 0429d0886731a65d1847597e383301ef78c4d14d /llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
parent: c6fd5abecc42fb56525cb56b62f95603863ae6a0 (diff)
download: bcm5719-llvm-17060f0a54b681b8c7cec2f9ab465f6a1e51d968.tar.gz
bcm5719-llvm-17060f0a54b681b8c7cec2f9ab465f6a1e51d968.zip
1 files changed, 108 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index f3d50c9c490..5f7649c1c0e 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -194,3 +194,111 @@ entry:
   store i64 %old, i64 addrspace(1)* %out
   ret void
 }
+
+; GCN-LABEL: max_i32_varying:
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
+; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
+; GFX8MORE: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
+define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
+entry:
+  %lane = call i32 @llvm.amdgcn.workitem.id.x()
+  %old = atomicrmw max i32 addrspace(3)* @local_var32, i32 %lane acq_rel
+  store i32 %old, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: max_i64_constant:
+; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
+; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0
+; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]]
+; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]]
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0
+; GCN: ds_max_rtn_i64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}}
+define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
+entry:
+  %old = atomicrmw max i64 addrspace(3)* @local_var64, i64 5 acq_rel
+  store i64 %old, i64 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: min_i32_varying:
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
+; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
+; GFX8MORE: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
+define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
+entry:
+  %lane = call i32 @llvm.amdgcn.workitem.id.x()
+  %old = atomicrmw min i32 addrspace(3)* @local_var32, i32 %lane acq_rel
+  store i32 %old, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: min_i64_constant:
+; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
+; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0
+; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]]
+; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]]
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0
+; GCN: ds_min_rtn_i64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}}
+define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
+entry:
+  %old = atomicrmw min i64 addrspace(3)* @local_var64, i64 5 acq_rel
+  store i64 %old, i64 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: umax_i32_varying:
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
+; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
+; GFX8MORE: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
+define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
+entry:
+  %lane = call i32 @llvm.amdgcn.workitem.id.x()
+  %old = atomicrmw umax i32 addrspace(3)* @local_var32, i32 %lane acq_rel
+  store i32 %old, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: umax_i64_constant:
+; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
+; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0
+; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]]
+; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]]
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0
+; GCN: ds_max_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}}
+define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
+entry:
+  %old = atomicrmw umax i64 addrspace(3)* @local_var64, i64 5 acq_rel
+  store i64 %old, i64 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: umin_i32_varying:
+; GFX8MORE: v_readlane_b32 s[[scalar_value:[0-9]+]], v{{[0-9]+}}, 63
+; GFX8MORE: v_mov_b32{{(_e[0-9]+)?}} v[[value:[0-9]+]], s[[scalar_value]]
+; GFX8MORE: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v[[value]]
+define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
+entry:
+  %lane = call i32 @llvm.amdgcn.workitem.id.x()
+  %old = atomicrmw umin i32 addrspace(3)* @local_var32, i32 %lane acq_rel
+  store i32 %old, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: umin_i64_constant:
+; GCN: v_cmp_ne_u32_e64 s{{\[}}[[exec_lo:[0-9]+]]:[[exec_hi:[0-9]+]]{{\]}}, 1, 0
+; GCN: v_mbcnt_lo_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_lo:[0-9]+]], s[[exec_lo]], 0
+; GCN: v_mbcnt_hi_u32_b32{{(_e[0-9]+)?}} v[[mbcnt_hi:[0-9]+]], s[[exec_hi]], v[[mbcnt_lo]]
+; GCN: v_cmp_eq_u32{{(_e[0-9]+)?}} vcc, 0, v[[mbcnt_hi]]
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_lo:[0-9]+]], 5
+; GCN: v_mov_b32{{(_e[0-9]+)?}} v[[value_hi:[0-9]+]], 0
+; GCN: ds_min_rtn_u64 v{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v{{[0-9]+}}, v{{\[}}[[value_lo]]:[[value_hi]]{{\]}}
+define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
+entry:
+  %old = atomicrmw umin i64 addrspace(3)* @local_var64, i64 5 acq_rel
+  store i64 %old, i64 addrspace(1)* %out
+  ret void
+}
author	Jay Foad <jay.foad@gmail.com>	2019-07-16 17:44:54 +0000
committer	Jay Foad <jay.foad@gmail.com>	2019-07-16 17:44:54 +0000
commit	17060f0a54b681b8c7cec2f9ab465f6a1e51d968 (patch)
tree	0429d0886731a65d1847597e383301ef78c4d14d /llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
parent	c6fd5abecc42fb56525cb56b62f95603863ae6a0 (diff)
download	bcm5719-llvm-17060f0a54b681b8c7cec2f9ab465f6a1e51d968.tar.gz bcm5719-llvm-17060f0a54b681b8c7cec2f9ab465f6a1e51d968.zip