diff options
author | Craig Topper <craig.topper@intel.com> | 2018-05-30 05:26:04 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-05-30 05:26:04 +0000 |
commit | f6e79c6d3fcd14f35351d99b5060614f328ab21b (patch) | |
tree | c9ddc861a738cbb2d4d459dc523e1361b94cc24c /clang/lib | |
parent | cc0741e59f854777144eca819c04d92a3bbf7ecb (diff) | |
download | bcm5719-llvm-f6e79c6d3fcd14f35351d99b5060614f328ab21b.tar.gz bcm5719-llvm-f6e79c6d3fcd14f35351d99b5060614f328ab21b.zip |
[X86] Remove masking from the AVX512VNNI builtins. Use a select in IR instead.
llvm-svn: 333509
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/Headers/avx512vlvnniintrin.h | 189 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vnniintrin.h | 97 |
2 files changed, 118 insertions, 168 deletions
diff --git a/clang/lib/Headers/avx512vlvnniintrin.h b/clang/lib/Headers/avx512vlvnniintrin.h index 40cbbf29964..5be1aa5319e 100644 --- a/clang/lib/Headers/avx512vlvnniintrin.h +++ b/clang/lib/Headers/avx512vlvnniintrin.h @@ -33,222 +33,189 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, + (__v8si)__B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpbusd_epi32(__S, __A, __B), + (__v8si)__S); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpbusd_epi32(__S, __A, __B), + (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) +_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); + return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, + (__v8si)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpbusds_epi32(__S, __A, __B), + (__v8si)__S); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); -} + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpbusds_epi32(__S, __A, __B), + (__v8si)_mm256_setzero_si256()); + static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) +_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); + return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, + (__v8si)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpwssd_epi32(__S, __A, __B), + (__v8si)__S); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpwssd_epi32(__S, __A, __B), + (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) +_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); + return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, + (__v8si)__B); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpwssds_epi32(__S, __A, __B), + (__v8si)__S); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { - return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256(__U, + (__v8si)_mm256_dpwssds_epi32(__S, __A, __B), + (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { - return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S, - (__v8si) __A, - (__v8si) __B, - (__mmask8) -1); + return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, + (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpbusd_epi32(__S, __A, __B), + (__v4si)__S); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpbusd_epi32(__S, __A, __B), + (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) +_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); + return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, + (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpbusds_epi32(__S, __A, __B), + (__v4si)__S); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpbusds_epi32(__S, __A, __B), + (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) +_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); + return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, + (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpwssd_epi32(__S, __A, __B), + (__v4si)__S); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpwssd_epi32(__S, __A, __B), + (__v4si)_mm_setzero_si128()); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) +_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); + return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, + (__v4si)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpwssds_epi32(__S, __A, __B), + (__v4si)__S); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128(__U, + (__v4si)_mm_dpwssds_epi32(__S, __A, __B), + (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S, - (__v4si) __A, - (__v4si) __B, - (__mmask8) -1); -} - - #undef __DEFAULT_FN_ATTRS #endif diff --git a/clang/lib/Headers/avx512vnniintrin.h b/clang/lib/Headers/avx512vnniintrin.h index 0c6badd231a..410e1ee59e2 100644 --- a/clang/lib/Headers/avx512vnniintrin.h +++ b/clang/lib/Headers/avx512vnniintrin.h @@ -33,114 +33,97 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) +{ + return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpbusd_epi32(__S, __A, __B), + (__v16si)__S); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpbusd_epi32(__S, __A, __B), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B) +_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) -1); + return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A, + (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpbusds_epi32(__S, __A, __B), + (__v16si)__S); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpbusds_epi32(__S, __A, __B), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B) +_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) -1); + return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A, + (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpwssd_epi32(__S, __A, __B), + (__v16si)__S); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpwssd_epi32(__S, __A, __B), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B) +_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) -1); + return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A, + (__v16si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpwssds_epi32(__S, __A, __B), + (__v16si)__S); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) { - return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) __U); -} - -static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B) -{ - return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S, - (__v16si) __A, - (__v16si) __B, - (__mmask16) -1); + return (__m512i)__builtin_ia32_selectd_512(__U, + (__v16si)_mm512_dpwssds_epi32(__S, __A, __B), + (__v16si)_mm512_setzero_si512()); } - #undef __DEFAULT_FN_ATTRS #endif |