diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2014-10-03 19:49:37 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2014-10-03 19:49:37 +0000 |
commit | ed5bbfdb1b5d3b7d76352c37e187663c1b06b06f (patch) | |
tree | 6dd94e1efe62b8cc59f65859ca4213181d6dfd72 /libclc/generic | |
parent | b5064f79efadaef7822be7427a30a6bd273a9d50 (diff) | |
download | bcm5719-llvm-ed5bbfdb1b5d3b7d76352c37e187663c1b06b06f.tar.gz bcm5719-llvm-ed5bbfdb1b5d3b7d76352c37e187663c1b06b06f.zip |
Implement async_work_group_strided_copy builtin v2
This is a simple implementation which just copies data synchronously.
v2:
- Use size_t.
llvm-svn: 219007
Diffstat (limited to 'libclc/generic')
6 files changed, 66 insertions, 0 deletions
diff --git a/libclc/generic/include/clc/async/async_work_group_strided_copy.h b/libclc/generic/include/clc/async/async_work_group_strided_copy.h new file mode 100644 index 00000000000..bfa6f31faca --- /dev/null +++ b/libclc/generic/include/clc/async/async_work_group_strided_copy.h @@ -0,0 +1,15 @@ +#define __CLC_DST_ADDR_SPACE local +#define __CLC_SRC_ADDR_SPACE global +#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc> +#include <clc/async/gentype.inc> +#undef __CLC_DST_ADDR_SPACE +#undef __CLC_SRC_ADDR_SPACE +#undef __CLC_BODY + +#define __CLC_DST_ADDR_SPACE global +#define __CLC_SRC_ADDR_SPACE local +#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc> +#include <clc/async/gentype.inc> +#undef __CLC_DST_ADDR_SPACE +#undef __CLC_SRC_ADDR_SPACE +#undef __CLC_BODY diff --git a/libclc/generic/include/clc/async/async_work_group_strided_copy.inc b/libclc/generic/include/clc/async/async_work_group_strided_copy.inc new file mode 100644 index 00000000000..bdbea3aa4a1 --- /dev/null +++ b/libclc/generic/include/clc/async/async_work_group_strided_copy.inc @@ -0,0 +1,6 @@ +_CLC_OVERLOAD _CLC_DECL event_t async_work_group_strided_copy( + __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst, + const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src, + size_t num_gentypes, + size_t stride, + event_t event); diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h index 53f5b28d4fa..85bf0fa4286 100644 --- a/libclc/generic/include/clc/clc.h +++ b/libclc/generic/include/clc/clc.h @@ -137,6 +137,7 @@ #include <clc/synchronization/barrier.h> /* 6.11.10 Async Copy and Prefetch Functions */ +#include <clc/async/async_work_group_strided_copy.h> #include <clc/async/prefetch.h> #include <clc/async/wait_group_events.h> diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 175221110b8..1dd3ff717c3 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -1,3 +1,4 @@ +async/async_work_group_strided_copy.cl async/prefetch.cl async/wait_group_events.cl atomic/atomic_xchg.cl diff --git a/libclc/generic/lib/async/async_work_group_strided_copy.cl b/libclc/generic/lib/async/async_work_group_strided_copy.cl new file mode 100644 index 00000000000..61b88986fe4 --- /dev/null +++ b/libclc/generic/lib/async/async_work_group_strided_copy.cl @@ -0,0 +1,9 @@ +#include <clc/clc.h> + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#endif + +#define __CLC_BODY <async_work_group_strided_copy.inc> +#include <clc/async/gentype.inc> +#undef __CLC_BODY diff --git a/libclc/generic/lib/async/async_work_group_strided_copy.inc b/libclc/generic/lib/async/async_work_group_strided_copy.inc new file mode 100644 index 00000000000..d81a8b79430 --- /dev/null +++ b/libclc/generic/lib/async/async_work_group_strided_copy.inc @@ -0,0 +1,34 @@ + +#define STRIDED_COPY(dst, src, num_gentypes, dst_stride, src_stride) \ + size_t size = get_local_size(0) * get_local_size(1) * get_local_size(2); \ + size_t id = (get_local_size(1) * get_local_size(2) * get_local_id(0)) + \ + (get_local_size(2) * get_local_id(1)) + \ + get_local_id(2); \ + size_t i; \ + \ + for (i = id; i < num_gentypes; i += size) { \ + dst[i * dst_stride] = src[i * src_stride]; \ + } + + +_CLC_OVERLOAD _CLC_DEF event_t async_work_group_strided_copy( + local __CLC_GENTYPE *dst, + const global __CLC_GENTYPE *src, + size_t num_gentypes, + size_t src_stride, + event_t event) { + + STRIDED_COPY(dst, src, num_gentypes, 1, src_stride); + return event; +} + +_CLC_OVERLOAD _CLC_DEF event_t async_work_group_strided_copy( + global __CLC_GENTYPE *dst, + const local __CLC_GENTYPE *src, + size_t num_gentypes, + size_t dst_stride, + event_t event) { + + STRIDED_COPY(dst, src, num_gentypes, dst_stride, 1); + return event; +} |