diff options
-rw-r--r-- | libclc/generic/include/clc/shared/vstore.h | 41 | ||||
-rw-r--r-- | libclc/generic/lib/shared/vstore.cl | 30 | ||||
-rw-r--r-- | libclc/generic/lib/shared/vstore_half.inc | 21 |
3 files changed, 59 insertions, 33 deletions
diff --git a/libclc/generic/include/clc/shared/vstore.h b/libclc/generic/include/clc/shared/vstore.h index 0e3f694b90c..a246d52f205 100644 --- a/libclc/generic/include/clc/shared/vstore.h +++ b/libclc/generic/include/clc/shared/vstore.h @@ -16,37 +16,52 @@ #define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \ _CLC_VECTOR_VSTORE_PRIM3(,PRIM_TYPE, PRIM_TYPE) \ -#define _CLC_VECTOR_VSTORE_PRIM() \ - _CLC_VECTOR_VSTORE_PRIM1(char) \ - _CLC_VECTOR_VSTORE_PRIM1(uchar) \ - _CLC_VECTOR_VSTORE_PRIM1(short) \ - _CLC_VECTOR_VSTORE_PRIM1(ushort) \ - _CLC_VECTOR_VSTORE_PRIM1(int) \ - _CLC_VECTOR_VSTORE_PRIM1(uint) \ - _CLC_VECTOR_VSTORE_PRIM1(long) \ - _CLC_VECTOR_VSTORE_PRIM1(ulong) \ - _CLC_VECTOR_VSTORE_PRIM1(float) \ - _CLC_VECTOR_VSTORE_PRIM3(_half, half, float) +_CLC_VECTOR_VSTORE_PRIM1(char) +_CLC_VECTOR_VSTORE_PRIM1(uchar) +_CLC_VECTOR_VSTORE_PRIM1(short) +_CLC_VECTOR_VSTORE_PRIM1(ushort) +_CLC_VECTOR_VSTORE_PRIM1(int) +_CLC_VECTOR_VSTORE_PRIM1(uint) +_CLC_VECTOR_VSTORE_PRIM1(long) +_CLC_VECTOR_VSTORE_PRIM1(ulong) +_CLC_VECTOR_VSTORE_PRIM1(float) +_CLC_VECTOR_VSTORE_PRIM3(_half, half, float) +// Use suffix to declare aligned vstorea_halfN +_CLC_VECTOR_VSTORE_PRIM3(a_half, half, float) #ifdef cl_khr_fp64 _CLC_VECTOR_VSTORE_PRIM1(double) _CLC_VECTOR_VSTORE_PRIM3(_half, half, double) + // Use suffix to declare aligned vstorea_halfN + _CLC_VECTOR_VSTORE_PRIM3(a_half, half, double) + + // Scalar vstore_half also needs to be declared _CLC_VSTORE_DECL(_half, half, double, , __private) _CLC_VSTORE_DECL(_half, half, double, , __local) _CLC_VSTORE_DECL(_half, half, double, , __global) + + // Scalar vstorea_half is not part of the specs but CTS expects it + _CLC_VSTORE_DECL(a_half, half, double, , __private) + _CLC_VSTORE_DECL(a_half, half, double, , __local) + _CLC_VSTORE_DECL(a_half, half, double, , __global) #endif #ifdef cl_khr_fp16 _CLC_VECTOR_VSTORE_PRIM1(half) #endif -_CLC_VECTOR_VSTORE_PRIM() +// Scalar vstore_half also needs to be declared _CLC_VSTORE_DECL(_half, half, float, , __private) _CLC_VSTORE_DECL(_half, half, float, , __local) _CLC_VSTORE_DECL(_half, half, float, , __global) +// Scalar vstorea_half is not part of the specs but CTS expects it +_CLC_VSTORE_DECL(a_half, half, float, , __private) +_CLC_VSTORE_DECL(a_half, half, float, , __local) +_CLC_VSTORE_DECL(a_half, half, float, , __global) + + #undef _CLC_VSTORE_DECL #undef _CLC_VECTOR_VSTORE_DECL #undef _CLC_VECTOR_VSTORE_PRIM3 #undef _CLC_VECTOR_VSTORE_PRIM1 -#undef _CLC_VECTOR_VSTORE_PRIM diff --git a/libclc/generic/lib/shared/vstore.cl b/libclc/generic/lib/shared/vstore.cl index 3343c1609bc..e5383a80d5b 100644 --- a/libclc/generic/lib/shared/vstore.cl +++ b/libclc/generic/lib/shared/vstore.cl @@ -33,23 +33,22 @@ VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \ VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \ -#define VSTORE_TYPES() \ - VSTORE_ADDR_SPACES(char) \ - VSTORE_ADDR_SPACES(uchar) \ - VSTORE_ADDR_SPACES(short) \ - VSTORE_ADDR_SPACES(ushort) \ - VSTORE_ADDR_SPACES(int) \ - VSTORE_ADDR_SPACES(uint) \ - VSTORE_ADDR_SPACES(long) \ - VSTORE_ADDR_SPACES(ulong) \ - VSTORE_ADDR_SPACES(float) \ +VSTORE_ADDR_SPACES(char) +VSTORE_ADDR_SPACES(uchar) +VSTORE_ADDR_SPACES(short) +VSTORE_ADDR_SPACES(ushort) +VSTORE_ADDR_SPACES(int) +VSTORE_ADDR_SPACES(uint) +VSTORE_ADDR_SPACES(long) +VSTORE_ADDR_SPACES(ulong) +VSTORE_ADDR_SPACES(float) -VSTORE_TYPES() #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable VSTORE_ADDR_SPACES(double) #endif + #ifdef cl_khr_fp16 #pragma OPENCL EXTENSION cl_khr_fp16 : enable VSTORE_ADDR_SPACES(half) @@ -95,13 +94,17 @@ DECLARE_HELPER(double, __local, __builtin_store_half); VEC_STORE8(STYPE, AS, val.lo) \ VEC_STORE8(STYPE, AS, val.hi) -#define __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) \ +#define __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \ _CLC_OVERLOAD _CLC_DEF void vstore_half##SUFFIX(TYPE vec, size_t offset, AS half *mem) { \ offset *= VEC_SIZE; \ VEC_STORE##VEC_SIZE(STYPE, AS, vec) \ + } \ + _CLC_OVERLOAD _CLC_DEF void vstorea_half##SUFFIX(TYPE vec, size_t offset, AS half *mem) { \ + offset *= OFFSET; \ + VEC_STORE##VEC_SIZE(STYPE, AS, vec) \ } -#define FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) +#define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) #define __CLC_BODY "vstore_half.inc" #include <clc/math/gentype.inc> @@ -115,6 +118,5 @@ DECLARE_HELPER(double, __local, __builtin_store_half); #undef VEC_LOAD2 #undef VEC_LOAD1 #undef DECLARE_HELPER -#undef VSTORE_TYPES #undef VSTORE_ADDR_SPACES #undef VSTORE_VECTORIZE diff --git a/libclc/generic/lib/shared/vstore_half.inc b/libclc/generic/lib/shared/vstore_half.inc index fee52bc9c23..ee4e38b4c6c 100644 --- a/libclc/generic/lib/shared/vstore_half.inc +++ b/libclc/generic/lib/shared/vstore_half.inc @@ -1,10 +1,19 @@ #ifdef __CLC_VECSIZE - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private); - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local); - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global); + +#if __CLC_VECSIZE == 3 +# define __CLC_OFFSET 4 +#else +# define __CLC_OFFSET __CLC_VECSIZE +#endif + + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local); + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global); + +#undef __CLC_OFFSET #else - FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private); - FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local); - FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global); + FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private); + FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local); + FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global); #endif |