summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Vesely <jan.vesely@rutgers.edu>2017-08-16 17:08:56 +0000
committerJan Vesely <jan.vesely@rutgers.edu>2017-08-16 17:08:56 +0000
commit1977092dc3a22b0814314f6a14ba8056462318ee (patch)
tree3c40d21ec294af5567bbb2665ea8f893dfa437b6
parentfec506daaa4f2c4d0d3b449fbf3901bfd6270b70 (diff)
downloadbcm5719-llvm-1977092dc3a22b0814314f6a14ba8056462318ee.tar.gz
bcm5719-llvm-1977092dc3a22b0814314f6a14ba8056462318ee.zip
amdgcn: Implement {read_,write_,}mem_fence builtin
v2: add more detailed comment about waitcnt instruction Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu> Reviewed-by: Aaron Watry <awatry@gmail.com> Tested-by: Aaron Watry <awatry@gmail.com> llvm-svn: 311021
-rw-r--r--libclc/amdgcn/lib/SOURCES2
-rw-r--r--libclc/amdgcn/lib/mem_fence/fence.cl39
-rw-r--r--libclc/amdgcn/lib/mem_fence/waitcnt.ll11
-rw-r--r--libclc/generic/include/clc/clc.h3
-rw-r--r--libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h3
5 files changed, 58 insertions, 0 deletions
diff --git a/libclc/amdgcn/lib/SOURCES b/libclc/amdgcn/lib/SOURCES
index 1ff5fd12b3b..24f59495cf9 100644
--- a/libclc/amdgcn/lib/SOURCES
+++ b/libclc/amdgcn/lib/SOURCES
@@ -1,4 +1,6 @@
math/ldexp.cl
+mem_fence/fence.cl
+mem_fence/waitcnt.ll
synchronization/barrier_impl.ll
workitem/get_global_offset.cl
workitem/get_group_id.cl
diff --git a/libclc/amdgcn/lib/mem_fence/fence.cl b/libclc/amdgcn/lib/mem_fence/fence.cl
new file mode 100644
index 00000000000..408ffc305a3
--- /dev/null
+++ b/libclc/amdgcn/lib/mem_fence/fence.cl
@@ -0,0 +1,39 @@
+#include <clc/clc.h>
+
+void __clc_amdgcn_s_waitcnt(unsigned flags);
+
+// s_waitcnt takes 16bit argument with a combined number of maximum allowed
+// pending operations:
+// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
+// [7] -- undefined
+// [6:4] -- exports, GDS, and mem write
+// [3:0] -- vector memory operations
+
+// Newer clang supports __builtin_amdgcn_s_waitcnt
+#if __clang_major__ >= 5
+# define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
+#else
+# define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
+#endif
+
+_CLC_DEF void mem_fence(cl_mem_fence_flags flags)
+{
+ if (flags & CLK_GLOBAL_MEM_FENCE) {
+ // scalar loads are counted with LGKM but we don't know whether
+ // the compiler turned any loads to scalar
+ __waitcnt(0);
+ } else if (flags & CLK_LOCAL_MEM_FENCE)
+ __waitcnt(0xff); // LGKM is [12:8]
+}
+#undef __waitcnt
+
+// We don't have separate mechanism for read and write fences
+_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags)
+{
+ mem_fence(flags);
+}
+
+_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags)
+{
+ mem_fence(flags);
+}
diff --git a/libclc/amdgcn/lib/mem_fence/waitcnt.ll b/libclc/amdgcn/lib/mem_fence/waitcnt.ll
new file mode 100644
index 00000000000..8be7f18452b
--- /dev/null
+++ b/libclc/amdgcn/lib/mem_fence/waitcnt.ll
@@ -0,0 +1,11 @@
+declare void @llvm.amdgcn.s.waitcnt(i32) #0
+
+; Export waitcnt intrinsic for clang < 5
+define void @__clc_amdgcn_s_waitcnt(i32 %flags) #1 {
+entry:
+ tail call void @llvm.amdgcn.s.waitcnt(i32 %flags)
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind alwaysinline }
diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h
index 5130632be46..deb9d70f784 100644
--- a/libclc/generic/include/clc/clc.h
+++ b/libclc/generic/include/clc/clc.h
@@ -179,6 +179,9 @@
#include <clc/synchronization/cl_mem_fence_flags.h>
#include <clc/synchronization/barrier.h>
+/* 6.11.9 Explicit Memory Fence Functions */
+#include <clc/explicit_fence/explicit_memory_fence.h>
+
/* 6.11.10 Async Copy and Prefetch Functions */
#include <clc/async/async_work_group_copy.h>
#include <clc/async/async_work_group_strided_copy.h>
diff --git a/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h b/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h
new file mode 100644
index 00000000000..8e046b1225d
--- /dev/null
+++ b/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h
@@ -0,0 +1,3 @@
+_CLC_DECL void mem_fence(cl_mem_fence_flags flags);
+_CLC_DECL void read_mem_fence(cl_mem_fence_flags flags);
+_CLC_DECL void write_mem_fence(cl_mem_fence_flags flags);
OpenPOWER on IntegriCloud