summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Vesely <jan.vesely@rutgers.edu>2017-08-16 17:09:00 +0000
committerJan Vesely <jan.vesely@rutgers.edu>2017-08-16 17:09:00 +0000
commit999b1d942627537be989a53748b61b49a1d59a24 (patch)
treefa3ab16955f85f5135b17a8bce01f62d8bac0c34
parent1977092dc3a22b0814314f6a14ba8056462318ee (diff)
downloadbcm5719-llvm-999b1d942627537be989a53748b61b49a1d59a24.tar.gz
bcm5719-llvm-999b1d942627537be989a53748b61b49a1d59a24.zip
amdgcn: rewrite barrier() using fence and clang __builtin_amdgcn_s_barrier
Specs require using fences when barrier() is invoked: "The barrier function will either flush any variables stored in local memory or queue a memory fence to ensure correct ordering of memory operations to local memory." and "The barrier function will queue a memory fence to ensure correct ordering of memory operations to global memory." Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> Reviewed-by: Aaron Watry <awatry@gmail.com> Tested-by: Aaron Watry <awatry@gmail.com> llvm-svn: 311022
-rw-r--r--libclc/amdgcn/lib/SOURCES2
-rw-r--r--libclc/amdgcn/lib/synchronization/barrier.cl7
-rw-r--r--libclc/amdgcn/lib/synchronization/barrier_impl.ll32
3 files changed, 8 insertions, 33 deletions
diff --git a/libclc/amdgcn/lib/SOURCES b/libclc/amdgcn/lib/SOURCES
index 24f59495cf9..a1f9483f5be 100644
--- a/libclc/amdgcn/lib/SOURCES
+++ b/libclc/amdgcn/lib/SOURCES
@@ -1,7 +1,7 @@
math/ldexp.cl
mem_fence/fence.cl
mem_fence/waitcnt.ll
-synchronization/barrier_impl.ll
+synchronization/barrier.cl
workitem/get_global_offset.cl
workitem/get_group_id.cl
workitem/get_global_size.ll
diff --git a/libclc/amdgcn/lib/synchronization/barrier.cl b/libclc/amdgcn/lib/synchronization/barrier.cl
new file mode 100644
index 00000000000..e2f3c1369bb
--- /dev/null
+++ b/libclc/amdgcn/lib/synchronization/barrier.cl
@@ -0,0 +1,7 @@
+#include <clc/clc.h>
+
+_CLC_DEF void barrier(cl_mem_fence_flags flags)
+{
+ mem_fence(flags);
+ __builtin_amdgcn_s_barrier();
+}
diff --git a/libclc/amdgcn/lib/synchronization/barrier_impl.ll b/libclc/amdgcn/lib/synchronization/barrier_impl.ll
deleted file mode 100644
index 1809eddf695..00000000000
--- a/libclc/amdgcn/lib/synchronization/barrier_impl.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-declare i32 @__clc_clk_local_mem_fence() #1
-declare i32 @__clc_clk_global_mem_fence() #1
-declare void @llvm.amdgcn.s.barrier() #0
-
-define void @barrier(i32 %flags) #2 {
-barrier_local_test:
- %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
- %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
- %1 = icmp ne i32 %0, 0
- br i1 %1, label %barrier_local, label %barrier_global_test
-
-barrier_local:
- call void @llvm.amdgcn.s.barrier()
- br label %barrier_global_test
-
-barrier_global_test:
- %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
- %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
- %3 = icmp ne i32 %2, 0
- br i1 %3, label %barrier_global, label %done
-
-barrier_global:
- call void @llvm.amdgcn.s.barrier()
- br label %done
-
-done:
- ret void
-}
-
-attributes #0 = { nounwind convergent }
-attributes #1 = { nounwind alwaysinline }
-attributes #2 = { nounwind convergent alwaysinline }
OpenPOWER on IntegriCloud