diff options
| author | Jan Vesely <jan.vesely@rutgers.edu> | 2017-08-16 17:08:56 +0000 |
|---|---|---|
| committer | Jan Vesely <jan.vesely@rutgers.edu> | 2017-08-16 17:08:56 +0000 |
| commit | 1977092dc3a22b0814314f6a14ba8056462318ee (patch) | |
| tree | 3c40d21ec294af5567bbb2665ea8f893dfa437b6 | |
| parent | fec506daaa4f2c4d0d3b449fbf3901bfd6270b70 (diff) | |
| download | bcm5719-llvm-1977092dc3a22b0814314f6a14ba8056462318ee.tar.gz bcm5719-llvm-1977092dc3a22b0814314f6a14ba8056462318ee.zip | |
amdgcn: Implement {read_,write_,}mem_fence builtin
v2: add more detailed comment about waitcnt instruction
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
Reviewed-by: Aaron Watry <awatry@gmail.com>
Tested-by: Aaron Watry <awatry@gmail.com>
llvm-svn: 311021
| -rw-r--r-- | libclc/amdgcn/lib/SOURCES | 2 | ||||
| -rw-r--r-- | libclc/amdgcn/lib/mem_fence/fence.cl | 39 | ||||
| -rw-r--r-- | libclc/amdgcn/lib/mem_fence/waitcnt.ll | 11 | ||||
| -rw-r--r-- | libclc/generic/include/clc/clc.h | 3 | ||||
| -rw-r--r-- | libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h | 3 |
5 files changed, 58 insertions, 0 deletions
diff --git a/libclc/amdgcn/lib/SOURCES b/libclc/amdgcn/lib/SOURCES index 1ff5fd12b3b..24f59495cf9 100644 --- a/libclc/amdgcn/lib/SOURCES +++ b/libclc/amdgcn/lib/SOURCES @@ -1,4 +1,6 @@ math/ldexp.cl +mem_fence/fence.cl +mem_fence/waitcnt.ll synchronization/barrier_impl.ll workitem/get_global_offset.cl workitem/get_group_id.cl diff --git a/libclc/amdgcn/lib/mem_fence/fence.cl b/libclc/amdgcn/lib/mem_fence/fence.cl new file mode 100644 index 00000000000..408ffc305a3 --- /dev/null +++ b/libclc/amdgcn/lib/mem_fence/fence.cl @@ -0,0 +1,39 @@ +#include <clc/clc.h> + +void __clc_amdgcn_s_waitcnt(unsigned flags); + +// s_waitcnt takes 16bit argument with a combined number of maximum allowed +// pending operations: +// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages +// [7] -- undefined +// [6:4] -- exports, GDS, and mem write +// [3:0] -- vector memory operations + +// Newer clang supports __builtin_amdgcn_s_waitcnt +#if __clang_major__ >= 5 +# define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x) +#else +# define __waitcnt(x) __clc_amdgcn_s_waitcnt(x) +#endif + +_CLC_DEF void mem_fence(cl_mem_fence_flags flags) +{ + if (flags & CLK_GLOBAL_MEM_FENCE) { + // scalar loads are counted with LGKM but we don't know whether + // the compiler turned any loads to scalar + __waitcnt(0); + } else if (flags & CLK_LOCAL_MEM_FENCE) + __waitcnt(0xff); // LGKM is [12:8] +} +#undef __waitcnt + +// We don't have separate mechanism for read and write fences +_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags) +{ + mem_fence(flags); +} + +_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags) +{ + mem_fence(flags); +} diff --git a/libclc/amdgcn/lib/mem_fence/waitcnt.ll b/libclc/amdgcn/lib/mem_fence/waitcnt.ll new file mode 100644 index 00000000000..8be7f18452b --- /dev/null +++ b/libclc/amdgcn/lib/mem_fence/waitcnt.ll @@ -0,0 +1,11 @@ +declare void @llvm.amdgcn.s.waitcnt(i32) #0 + +; Export waitcnt intrinsic for clang < 5 +define void @__clc_amdgcn_s_waitcnt(i32 %flags) #1 { +entry: + tail call void @llvm.amdgcn.s.waitcnt(i32 %flags) + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind alwaysinline } diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h index 5130632be46..deb9d70f784 100644 --- a/libclc/generic/include/clc/clc.h +++ b/libclc/generic/include/clc/clc.h @@ -179,6 +179,9 @@ #include <clc/synchronization/cl_mem_fence_flags.h> #include <clc/synchronization/barrier.h> +/* 6.11.9 Explicit Memory Fence Functions */ +#include <clc/explicit_fence/explicit_memory_fence.h> + /* 6.11.10 Async Copy and Prefetch Functions */ #include <clc/async/async_work_group_copy.h> #include <clc/async/async_work_group_strided_copy.h> diff --git a/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h b/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h new file mode 100644 index 00000000000..8e046b1225d --- /dev/null +++ b/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h @@ -0,0 +1,3 @@ +_CLC_DECL void mem_fence(cl_mem_fence_flags flags); +_CLC_DECL void read_mem_fence(cl_mem_fence_flags flags); +_CLC_DECL void write_mem_fence(cl_mem_fence_flags flags); |

