summaryrefslogtreecommitdiffstats
path: root/libclc/generic
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2014-10-03 19:49:37 +0000
committerTom Stellard <thomas.stellard@amd.com>2014-10-03 19:49:37 +0000
commited5bbfdb1b5d3b7d76352c37e187663c1b06b06f (patch)
tree6dd94e1efe62b8cc59f65859ca4213181d6dfd72 /libclc/generic
parentb5064f79efadaef7822be7427a30a6bd273a9d50 (diff)
downloadbcm5719-llvm-ed5bbfdb1b5d3b7d76352c37e187663c1b06b06f.tar.gz
bcm5719-llvm-ed5bbfdb1b5d3b7d76352c37e187663c1b06b06f.zip
Implement async_work_group_strided_copy builtin v2
This is a simple implementation which just copies data synchronously. v2: - Use size_t. llvm-svn: 219007
Diffstat (limited to 'libclc/generic')
-rw-r--r--libclc/generic/include/clc/async/async_work_group_strided_copy.h15
-rw-r--r--libclc/generic/include/clc/async/async_work_group_strided_copy.inc6
-rw-r--r--libclc/generic/include/clc/clc.h1
-rw-r--r--libclc/generic/lib/SOURCES1
-rw-r--r--libclc/generic/lib/async/async_work_group_strided_copy.cl9
-rw-r--r--libclc/generic/lib/async/async_work_group_strided_copy.inc34
6 files changed, 66 insertions, 0 deletions
diff --git a/libclc/generic/include/clc/async/async_work_group_strided_copy.h b/libclc/generic/include/clc/async/async_work_group_strided_copy.h
new file mode 100644
index 00000000000..bfa6f31faca
--- /dev/null
+++ b/libclc/generic/include/clc/async/async_work_group_strided_copy.h
@@ -0,0 +1,15 @@
+#define __CLC_DST_ADDR_SPACE local
+#define __CLC_SRC_ADDR_SPACE global
+#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_DST_ADDR_SPACE
+#undef __CLC_SRC_ADDR_SPACE
+#undef __CLC_BODY
+
+#define __CLC_DST_ADDR_SPACE global
+#define __CLC_SRC_ADDR_SPACE local
+#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_DST_ADDR_SPACE
+#undef __CLC_SRC_ADDR_SPACE
+#undef __CLC_BODY
diff --git a/libclc/generic/include/clc/async/async_work_group_strided_copy.inc b/libclc/generic/include/clc/async/async_work_group_strided_copy.inc
new file mode 100644
index 00000000000..bdbea3aa4a1
--- /dev/null
+++ b/libclc/generic/include/clc/async/async_work_group_strided_copy.inc
@@ -0,0 +1,6 @@
+_CLC_OVERLOAD _CLC_DECL event_t async_work_group_strided_copy(
+ __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst,
+ const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src,
+ size_t num_gentypes,
+ size_t stride,
+ event_t event);
diff --git a/libclc/generic/include/clc/clc.h b/libclc/generic/include/clc/clc.h
index 53f5b28d4fa..85bf0fa4286 100644
--- a/libclc/generic/include/clc/clc.h
+++ b/libclc/generic/include/clc/clc.h
@@ -137,6 +137,7 @@
#include <clc/synchronization/barrier.h>
/* 6.11.10 Async Copy and Prefetch Functions */
+#include <clc/async/async_work_group_strided_copy.h>
#include <clc/async/prefetch.h>
#include <clc/async/wait_group_events.h>
diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES
index 175221110b8..1dd3ff717c3 100644
--- a/libclc/generic/lib/SOURCES
+++ b/libclc/generic/lib/SOURCES
@@ -1,3 +1,4 @@
+async/async_work_group_strided_copy.cl
async/prefetch.cl
async/wait_group_events.cl
atomic/atomic_xchg.cl
diff --git a/libclc/generic/lib/async/async_work_group_strided_copy.cl b/libclc/generic/lib/async/async_work_group_strided_copy.cl
new file mode 100644
index 00000000000..61b88986fe4
--- /dev/null
+++ b/libclc/generic/lib/async/async_work_group_strided_copy.cl
@@ -0,0 +1,9 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_BODY <async_work_group_strided_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_BODY
diff --git a/libclc/generic/lib/async/async_work_group_strided_copy.inc b/libclc/generic/lib/async/async_work_group_strided_copy.inc
new file mode 100644
index 00000000000..d81a8b79430
--- /dev/null
+++ b/libclc/generic/lib/async/async_work_group_strided_copy.inc
@@ -0,0 +1,34 @@
+
+#define STRIDED_COPY(dst, src, num_gentypes, dst_stride, src_stride) \
+ size_t size = get_local_size(0) * get_local_size(1) * get_local_size(2); \
+ size_t id = (get_local_size(1) * get_local_size(2) * get_local_id(0)) + \
+ (get_local_size(2) * get_local_id(1)) + \
+ get_local_id(2); \
+ size_t i; \
+ \
+ for (i = id; i < num_gentypes; i += size) { \
+ dst[i * dst_stride] = src[i * src_stride]; \
+ }
+
+
+_CLC_OVERLOAD _CLC_DEF event_t async_work_group_strided_copy(
+ local __CLC_GENTYPE *dst,
+ const global __CLC_GENTYPE *src,
+ size_t num_gentypes,
+ size_t src_stride,
+ event_t event) {
+
+ STRIDED_COPY(dst, src, num_gentypes, 1, src_stride);
+ return event;
+}
+
+_CLC_OVERLOAD _CLC_DEF event_t async_work_group_strided_copy(
+ global __CLC_GENTYPE *dst,
+ const local __CLC_GENTYPE *src,
+ size_t num_gentypes,
+ size_t dst_stride,
+ event_t event) {
+
+ STRIDED_COPY(dst, src, num_gentypes, dst_stride, 1);
+ return event;
+}
OpenPOWER on IntegriCloud