diff options
| author | Jan Vesely <jan.vesely@rutgers.edu> | 2017-08-16 17:09:00 +0000 |
|---|---|---|
| committer | Jan Vesely <jan.vesely@rutgers.edu> | 2017-08-16 17:09:00 +0000 |
| commit | 999b1d942627537be989a53748b61b49a1d59a24 (patch) | |
| tree | fa3ab16955f85f5135b17a8bce01f62d8bac0c34 | |
| parent | 1977092dc3a22b0814314f6a14ba8056462318ee (diff) | |
| download | bcm5719-llvm-999b1d942627537be989a53748b61b49a1d59a24.tar.gz bcm5719-llvm-999b1d942627537be989a53748b61b49a1d59a24.zip | |
amdgcn: rewrite barrier() using fence and clang __builtin_amdgcn_s_barrier
Specs require using fences when barrier() is invoked:
"The barrier function will either flush any variables stored in local memory
or queue a memory fence to ensure correct ordering of memory operations to local memory."
and
"The barrier function will queue a memory fence to ensure correct ordering
of memory operations to global memory."
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Aaron Watry <awatry@gmail.com>
Tested-by: Aaron Watry <awatry@gmail.com>
llvm-svn: 311022
| -rw-r--r-- | libclc/amdgcn/lib/SOURCES | 2 | ||||
| -rw-r--r-- | libclc/amdgcn/lib/synchronization/barrier.cl | 7 | ||||
| -rw-r--r-- | libclc/amdgcn/lib/synchronization/barrier_impl.ll | 32 |
3 files changed, 8 insertions, 33 deletions
diff --git a/libclc/amdgcn/lib/SOURCES b/libclc/amdgcn/lib/SOURCES index 24f59495cf9..a1f9483f5be 100644 --- a/libclc/amdgcn/lib/SOURCES +++ b/libclc/amdgcn/lib/SOURCES @@ -1,7 +1,7 @@ math/ldexp.cl mem_fence/fence.cl mem_fence/waitcnt.ll -synchronization/barrier_impl.ll +synchronization/barrier.cl workitem/get_global_offset.cl workitem/get_group_id.cl workitem/get_global_size.ll diff --git a/libclc/amdgcn/lib/synchronization/barrier.cl b/libclc/amdgcn/lib/synchronization/barrier.cl new file mode 100644 index 00000000000..e2f3c1369bb --- /dev/null +++ b/libclc/amdgcn/lib/synchronization/barrier.cl @@ -0,0 +1,7 @@ +#include <clc/clc.h> + +_CLC_DEF void barrier(cl_mem_fence_flags flags) +{ + mem_fence(flags); + __builtin_amdgcn_s_barrier(); +} diff --git a/libclc/amdgcn/lib/synchronization/barrier_impl.ll b/libclc/amdgcn/lib/synchronization/barrier_impl.ll deleted file mode 100644 index 1809eddf695..00000000000 --- a/libclc/amdgcn/lib/synchronization/barrier_impl.ll +++ /dev/null @@ -1,32 +0,0 @@ -declare i32 @__clc_clk_local_mem_fence() #1 -declare i32 @__clc_clk_global_mem_fence() #1 -declare void @llvm.amdgcn.s.barrier() #0 - -define void @barrier(i32 %flags) #2 { -barrier_local_test: - %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence() - %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE - %1 = icmp ne i32 %0, 0 - br i1 %1, label %barrier_local, label %barrier_global_test - -barrier_local: - call void @llvm.amdgcn.s.barrier() - br label %barrier_global_test - -barrier_global_test: - %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence() - %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE - %3 = icmp ne i32 %2, 0 - br i1 %3, label %barrier_global, label %done - -barrier_global: - call void @llvm.amdgcn.s.barrier() - br label %done - -done: - ret void -} - -attributes #0 = { nounwind convergent } -attributes #1 = { nounwind alwaysinline } -attributes #2 = { nounwind convergent alwaysinline } |

