diff options
| author | Aaron Watry <awatry@gmail.com> | 2017-09-09 02:23:54 +0000 |
|---|---|---|
| committer | Aaron Watry <awatry@gmail.com> | 2017-09-09 02:23:54 +0000 |
| commit | 415a60f3035bdf36d2a93003995a50d8ef68eacb (patch) | |
| tree | baa8fed5cd2c14c5dc543ac3d8f95c8e616cedb2 /libclc/generic/include/clc/integer | |
| parent | 628fbcae4cc5e7d6076e104ad56b89b929b6f9fd (diff) | |
| download | bcm5719-llvm-415a60f3035bdf36d2a93003995a50d8ef68eacb.tar.gz bcm5719-llvm-415a60f3035bdf36d2a93003995a50d8ef68eacb.zip | |
integer: Add popcount implementation using ctpop intrinsic
Also copy/modify the unary_intrin.inc from math/ to make the
intrinsic declaration somewhat reusable.
Passes CL CTS integer_ops/test_integer_ops popcount tests for CL 1.2
Tested-by on GCN 1.0 (Pitcairn)
Signed-off-by: Aaron Watry <awatry@gmail.com>
Reviewed-by: Jan Vesely <jan.vesely@rutgers.edu>
llvm-svn: 312854
Diffstat (limited to 'libclc/generic/include/clc/integer')
| -rw-r--r-- | libclc/generic/include/clc/integer/popcount.h | 6 | ||||
| -rw-r--r-- | libclc/generic/include/clc/integer/unary_intrin.inc | 20 |
2 files changed, 26 insertions, 0 deletions
diff --git a/libclc/generic/include/clc/integer/popcount.h b/libclc/generic/include/clc/integer/popcount.h new file mode 100644 index 00000000000..99e402171de --- /dev/null +++ b/libclc/generic/include/clc/integer/popcount.h @@ -0,0 +1,6 @@ +#undef popcount +#define popcount __clc_popcount + +#define __CLC_FUNCTION __clc_popcount +#define __CLC_INTRINSIC "llvm.ctpop" +#include <clc/integer/unary_intrin.inc> diff --git a/libclc/generic/include/clc/integer/unary_intrin.inc b/libclc/generic/include/clc/integer/unary_intrin.inc new file mode 100644 index 00000000000..ee9862a4c5b --- /dev/null +++ b/libclc/generic/include/clc/integer/unary_intrin.inc @@ -0,0 +1,20 @@ +#define __CLC_INTRINSIC_DEF(SCALAR_TYPE, BIT_SIZE) \ +_CLC_OVERLOAD SCALAR_TYPE __CLC_FUNCTION(SCALAR_TYPE x) __asm(__CLC_INTRINSIC ".i" BIT_SIZE); \ +_CLC_OVERLOAD SCALAR_TYPE##2 __CLC_FUNCTION(SCALAR_TYPE##2 x) __asm(__CLC_INTRINSIC ".v2i" BIT_SIZE); \ +_CLC_OVERLOAD SCALAR_TYPE##3 __CLC_FUNCTION(SCALAR_TYPE##3 x) __asm(__CLC_INTRINSIC ".v3i" BIT_SIZE); \ +_CLC_OVERLOAD SCALAR_TYPE##4 __CLC_FUNCTION(SCALAR_TYPE##4 x) __asm(__CLC_INTRINSIC ".v4i" BIT_SIZE); \ +_CLC_OVERLOAD SCALAR_TYPE##8 __CLC_FUNCTION(SCALAR_TYPE##8 x) __asm(__CLC_INTRINSIC ".v8i" BIT_SIZE); \ +_CLC_OVERLOAD SCALAR_TYPE##16 __CLC_FUNCTION(SCALAR_TYPE##16 x) __asm(__CLC_INTRINSIC ".v16i" BIT_SIZE); + +__CLC_INTRINSIC_DEF(char, "8") +__CLC_INTRINSIC_DEF(uchar, "8") +__CLC_INTRINSIC_DEF(short, "16") +__CLC_INTRINSIC_DEF(ushort, "16") +__CLC_INTRINSIC_DEF(int, "32") +__CLC_INTRINSIC_DEF(uint, "32") +__CLC_INTRINSIC_DEF(long, "64") +__CLC_INTRINSIC_DEF(ulong, "64") + +#undef __CLC_FUNCTION +#undef __CLC_INTRINSIC +#undef __CLC_INTRINSIC_DEF |

