diff options
| author | Jan Vesely <jan.vesely@rutgers.edu> | 2017-09-04 15:52:05 +0000 |
|---|---|---|
| committer | Jan Vesely <jan.vesely@rutgers.edu> | 2017-09-04 15:52:05 +0000 |
| commit | e337b30c7d46d66ce80a35af066f0c02cd5a123b (patch) | |
| tree | 593fbe2526f5b83f6c57e9bb297b619795325f22 | |
| parent | 1796d590c156aaa081b316d49c0244360a993c97 (diff) | |
| download | bcm5719-llvm-e337b30c7d46d66ce80a35af066f0c02cd5a123b.tar.gz bcm5719-llvm-e337b30c7d46d66ce80a35af066f0c02cd5a123b.zip | |
r600: Cleanup barrier implementation.
We don't have memory fences for r600 so just call group barrier directly
Make sure that barrier is called even with 0 flags
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Aaron Watry <awatry@gmail.com>
llvm-svn: 312492
| -rw-r--r-- | libclc/amdgpu/lib/SOURCES | 1 | ||||
| -rw-r--r-- | libclc/amdgpu/lib/synchronization/barrier.cl | 10 | ||||
| -rw-r--r-- | libclc/r600/lib/synchronization/barrier_impl.ll | 31 |
3 files changed, 5 insertions, 37 deletions
diff --git a/libclc/amdgpu/lib/SOURCES b/libclc/amdgpu/lib/SOURCES index 9379ad7e883..f8175388793 100644 --- a/libclc/amdgpu/lib/SOURCES +++ b/libclc/amdgpu/lib/SOURCES @@ -1,7 +1,6 @@ atomic/atomic.cl math/nextafter.cl math/sqrt.cl -synchronization/barrier.cl image/get_image_width.cl image/get_image_height.cl image/get_image_depth.cl diff --git a/libclc/amdgpu/lib/synchronization/barrier.cl b/libclc/amdgpu/lib/synchronization/barrier.cl deleted file mode 100644 index 6f2900b06ee..00000000000 --- a/libclc/amdgpu/lib/synchronization/barrier.cl +++ /dev/null @@ -1,10 +0,0 @@ - -#include <clc/clc.h> - -_CLC_DEF int __clc_clk_local_mem_fence() { - return CLK_LOCAL_MEM_FENCE; -} - -_CLC_DEF int __clc_clk_global_mem_fence() { - return CLK_GLOBAL_MEM_FENCE; -} diff --git a/libclc/r600/lib/synchronization/barrier_impl.ll b/libclc/r600/lib/synchronization/barrier_impl.ll index 9b8fefb917d..777001a96ac 100644 --- a/libclc/r600/lib/synchronization/barrier_impl.ll +++ b/libclc/r600/lib/synchronization/barrier_impl.ll @@ -1,32 +1,11 @@ -declare i32 @__clc_clk_local_mem_fence() #1 -declare i32 @__clc_clk_global_mem_fence() #1 declare void @llvm.r600.group.barrier() #0 -define void @barrier(i32 %flags) #2 { -barrier_local_test: - %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence() - %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE - %1 = icmp ne i32 %0, 0 - br i1 %1, label %barrier_local, label %barrier_global_test - -barrier_local: - call void @llvm.r600.group.barrier() - br label %barrier_global_test - -barrier_global_test: - %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence() - %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE - %3 = icmp ne i32 %2, 0 - br i1 %3, label %barrier_global, label %done - -barrier_global: - call void @llvm.r600.group.barrier() - br label %done - -done: +define void @barrier(i32 %flags) #1 { +entry: + ; We should call mem_fence here, but that is not implemented for r600 yet + tail call void @llvm.r600.group.barrier() ret void } attributes #0 = { nounwind convergent } -attributes #1 = { nounwind alwaysinline } -attributes #2 = { nounwind convergent alwaysinline } +attributes #1 = { nounwind convergent alwaysinline } |

