diff options
-rw-r--r-- | libclc/amdgpu/lib/SOURCES | 1 | ||||
-rw-r--r-- | libclc/amdgpu/lib/atomic/atomic.cl | 65 | ||||
-rw-r--r-- | libclc/generic/lib/SOURCES | 9 | ||||
-rw-r--r-- | libclc/generic/lib/atomic/atomic_add.cl | 12 | ||||
-rw-r--r-- | libclc/generic/lib/atomic/atomic_and.cl | 12 | ||||
-rw-r--r-- | libclc/generic/lib/atomic/atomic_cmpxchg.cl | 12 | ||||
-rw-r--r-- | libclc/generic/lib/atomic/atomic_impl.ll | 133 | ||||
-rw-r--r-- | libclc/generic/lib/atomic/atomic_max.cl | 12 | ||||
-rw-r--r-- | libclc/generic/lib/atomic/atomic_min.cl | 12 | ||||
-rw-r--r-- | libclc/generic/lib/atomic/atomic_or.cl | 12 | ||||
-rw-r--r-- | libclc/generic/lib/atomic/atomic_sub.cl | 12 | ||||
-rw-r--r-- | libclc/generic/lib/atomic/atomic_xchg.cl | 15 | ||||
-rw-r--r-- | libclc/generic/lib/atomic/atomic_xor.cl | 12 |
13 files changed, 117 insertions, 202 deletions
diff --git a/libclc/amdgpu/lib/SOURCES b/libclc/amdgpu/lib/SOURCES index f8175388793..44146212744 100644 --- a/libclc/amdgpu/lib/SOURCES +++ b/libclc/amdgpu/lib/SOURCES @@ -1,4 +1,3 @@ -atomic/atomic.cl math/nextafter.cl math/sqrt.cl image/get_image_width.cl diff --git a/libclc/amdgpu/lib/atomic/atomic.cl b/libclc/amdgpu/lib/atomic/atomic.cl deleted file mode 100644 index 5bfe07b94bf..00000000000 --- a/libclc/amdgpu/lib/atomic/atomic.cl +++ /dev/null @@ -1,65 +0,0 @@ -#include <clc/clc.h> - -#define ATOMIC_FUNC_DEFINE(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ -_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE val) { \ - return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)val); \ -} - -/* For atomic functions that don't need different bitcode dependending on argument signedness */ -#define ATOMIC_FUNC_SIGN(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ - _CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE); \ - ATOMIC_FUNC_DEFINE(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ - ATOMIC_FUNC_DEFINE(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) - -#define ATOMIC_FUNC_ADDRSPACE(TYPE, FUNCTION) \ - ATOMIC_FUNC_SIGN(TYPE, FUNCTION, global, 1) \ - ATOMIC_FUNC_SIGN(TYPE, FUNCTION, local, 3) - -#define ATOMIC_FUNC(FUNCTION) \ - ATOMIC_FUNC_ADDRSPACE(int, FUNCTION) - -#define ATOMIC_FUNC_DEFINE_3_ARG(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ -_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE cmp, RET_SIGN TYPE val) { \ - return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)cmp, (ARG_SIGN TYPE)val); \ -} - -/* For atomic functions that don't need different bitcode dependending on argument signedness */ -#define ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ - _CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE, signed TYPE); \ - ATOMIC_FUNC_DEFINE_3_ARG(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \ - ATOMIC_FUNC_DEFINE_3_ARG(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) - -#define ATOMIC_FUNC_ADDRSPACE_3_ARG(TYPE, FUNCTION) \ - ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, global, 1) \ - ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, local, 3) - -#define ATOMIC_FUNC_3_ARG(FUNCTION) \ - ATOMIC_FUNC_ADDRSPACE_3_ARG(int, FUNCTION) - -ATOMIC_FUNC(atomic_add) -ATOMIC_FUNC(atomic_and) -ATOMIC_FUNC(atomic_or) -ATOMIC_FUNC(atomic_sub) -ATOMIC_FUNC(atomic_xchg) -ATOMIC_FUNC(atomic_xor) -ATOMIC_FUNC_3_ARG(atomic_cmpxchg) - -_CLC_DECL signed int __clc_atomic_max_addr1(volatile global signed int*, signed int); -_CLC_DECL signed int __clc_atomic_max_addr3(volatile local signed int*, signed int); -_CLC_DECL uint __clc_atomic_umax_addr1(volatile global uint*, uint); -_CLC_DECL uint __clc_atomic_umax_addr3(volatile local uint*, uint); - -ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, global, 1) -ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, local, 3) -ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, global, 1) -ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, local, 3) - -_CLC_DECL signed int __clc_atomic_min_addr1(volatile global signed int*, signed int); -_CLC_DECL signed int __clc_atomic_min_addr3(volatile local signed int*, signed int); -_CLC_DECL uint __clc_atomic_umin_addr1(volatile global uint*, uint); -_CLC_DECL uint __clc_atomic_umin_addr3(volatile local uint*, uint); - -ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, global, 1) -ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, local, 3) -ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, global, 1) -ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, local, 3) diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index 23905c46ee7..f919bc788d8 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -4,8 +4,15 @@ async/async_work_group_copy.cl async/async_work_group_strided_copy.cl async/prefetch.cl async/wait_group_events.cl +atomic/atomic_add.cl +atomic/atomic_and.cl +atomic/atomic_cmpxchg.cl +atomic/atomic_max.cl +atomic/atomic_min.cl +atomic/atomic_or.cl +atomic/atomic_sub.cl +atomic/atomic_xor.cl atomic/atomic_xchg.cl -atomic/atomic_impl.ll cl_khr_global_int32_base_atomics/atom_add.cl cl_khr_global_int32_base_atomics/atom_cmpxchg.cl cl_khr_global_int32_base_atomics/atom_dec.cl diff --git a/libclc/generic/lib/atomic/atomic_add.cl b/libclc/generic/lib/atomic/atomic_add.cl new file mode 100644 index 00000000000..f7d81f2dbab --- /dev/null +++ b/libclc/generic/lib/atomic/atomic_add.cl @@ -0,0 +1,12 @@ +#include <clc/clc.h> + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_add(p, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_and.cl b/libclc/generic/lib/atomic/atomic_and.cl new file mode 100644 index 00000000000..556d22ad45f --- /dev/null +++ b/libclc/generic/lib/atomic/atomic_and.cl @@ -0,0 +1,12 @@ +#include <clc/clc.h> + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_and(p, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_cmpxchg.cl b/libclc/generic/lib/atomic/atomic_cmpxchg.cl new file mode 100644 index 00000000000..fcf2e0cafdb --- /dev/null +++ b/libclc/generic/lib/atomic/atomic_cmpxchg.cl @@ -0,0 +1,12 @@ +#include <clc/clc.h> + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, TYPE val) { \ + return __sync_val_compare_and_swap(p, cmp, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_impl.ll b/libclc/generic/lib/atomic/atomic_impl.ll deleted file mode 100644 index 019147f8c50..00000000000 --- a/libclc/generic/lib/atomic/atomic_impl.ll +++ /dev/null @@ -1,133 +0,0 @@ -define i32 @__clc_atomic_add_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_add_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile add i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_and_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_and_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile and i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_cmpxchg_addr1(i32 addrspace(1)* nocapture %ptr, i32 %compare, i32 %value) nounwind alwaysinline { -entry: - %0 = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %compare, i32 %value seq_cst seq_cst - %1 = extractvalue { i32, i1 } %0, 0 - ret i32 %1 -} - -define i32 @__clc_atomic_cmpxchg_addr3(i32 addrspace(3)* nocapture %ptr, i32 %compare, i32 %value) nounwind alwaysinline { -entry: - %0 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 %compare, i32 %value seq_cst seq_cst - %1 = extractvalue { i32, i1 } %0, 0 - ret i32 %1 -} - -define i32 @__clc_atomic_max_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_max_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile max i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_min_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_min_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile min i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_or_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_or_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile or i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_umax_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_umax_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile umax i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_umin_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_umin_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile umin i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_sub_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_sub_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile sub i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_xchg_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_xchg_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile xchg i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_xor_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %value seq_cst - ret i32 %0 -} - -define i32 @__clc_atomic_xor_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline { -entry: - %0 = atomicrmw volatile xor i32 addrspace(3)* %ptr, i32 %value seq_cst - ret i32 %0 -} diff --git a/libclc/generic/lib/atomic/atomic_max.cl b/libclc/generic/lib/atomic/atomic_max.cl new file mode 100644 index 00000000000..afd86c2fe20 --- /dev/null +++ b/libclc/generic/lib/atomic/atomic_max.cl @@ -0,0 +1,12 @@ +#include <clc/clc.h> + +#define IMPL(TYPE, AS, OP) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_##OP(p, val); \ +} + +IMPL(int, global, max) +IMPL(unsigned int, global, umax) +IMPL(int, local, max) +IMPL(unsigned int, local, umax) +#undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_min.cl b/libclc/generic/lib/atomic/atomic_min.cl new file mode 100644 index 00000000000..a6099d54577 --- /dev/null +++ b/libclc/generic/lib/atomic/atomic_min.cl @@ -0,0 +1,12 @@ +#include <clc/clc.h> + +#define IMPL(TYPE, AS, OP) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_##OP(p, val); \ +} + +IMPL(int, global, min) +IMPL(unsigned int, global, umin) +IMPL(int, local, min) +IMPL(unsigned int, local, umin) +#undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_or.cl b/libclc/generic/lib/atomic/atomic_or.cl new file mode 100644 index 00000000000..75ef51db039 --- /dev/null +++ b/libclc/generic/lib/atomic/atomic_or.cl @@ -0,0 +1,12 @@ +#include <clc/clc.h> + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_or(p, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_sub.cl b/libclc/generic/lib/atomic/atomic_sub.cl new file mode 100644 index 00000000000..49098ffddd3 --- /dev/null +++ b/libclc/generic/lib/atomic/atomic_sub.cl @@ -0,0 +1,12 @@ +#include <clc/clc.h> + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_sub(p, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_xchg.cl b/libclc/generic/lib/atomic/atomic_xchg.cl index 9aee5950141..9c4e40480b3 100644 --- a/libclc/generic/lib/atomic/atomic_xchg.cl +++ b/libclc/generic/lib/atomic/atomic_xchg.cl @@ -1,9 +1,20 @@ #include <clc/clc.h> _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) { - return as_float(atomic_xchg((volatile global int *)p, as_int(val))); + return as_float(atomic_xchg((volatile global uint *)p, as_uint(val))); } _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) { - return as_float(atomic_xchg((volatile local int *)p, as_int(val))); + return as_float(atomic_xchg((volatile local uint *)p, as_uint(val))); } + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \ + return __sync_swap_4(p, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff --git a/libclc/generic/lib/atomic/atomic_xor.cl b/libclc/generic/lib/atomic/atomic_xor.cl new file mode 100644 index 00000000000..fcbe48145e7 --- /dev/null +++ b/libclc/generic/lib/atomic/atomic_xor.cl @@ -0,0 +1,12 @@ +#include <clc/clc.h> + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \ + return __sync_fetch_and_xor(p, val); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL |