summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-05-30 05:26:04 +0000
committerCraig Topper <craig.topper@intel.com>2018-05-30 05:26:04 +0000
commitf6e79c6d3fcd14f35351d99b5060614f328ab21b (patch)
treec9ddc861a738cbb2d4d459dc523e1361b94cc24c /clang/lib
parentcc0741e59f854777144eca819c04d92a3bbf7ecb (diff)
downloadbcm5719-llvm-f6e79c6d3fcd14f35351d99b5060614f328ab21b.tar.gz
bcm5719-llvm-f6e79c6d3fcd14f35351d99b5060614f328ab21b.zip
[X86] Remove masking from the AVX512VNNI builtins. Use a select in IR instead.
llvm-svn: 333509
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/Headers/avx512vlvnniintrin.h189
-rw-r--r--clang/lib/Headers/avx512vnniintrin.h97
2 files changed, 118 insertions, 168 deletions
diff --git a/clang/lib/Headers/avx512vlvnniintrin.h b/clang/lib/Headers/avx512vlvnniintrin.h
index 40cbbf29964..5be1aa5319e 100644
--- a/clang/lib/Headers/avx512vlvnniintrin.h
+++ b/clang/lib/Headers/avx512vlvnniintrin.h
@@ -33,222 +33,189 @@
static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
+{
+ return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A,
+ (__v8si)__B);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
+ (__v8si)__S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpbusd256_maskz ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
+ (__v8si)_mm256_setzero_si256());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
+_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpbusd256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) -1);
+ return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A,
+ (__v8si)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
+ (__v8si)__S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpbusds256_maskz ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
-}
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
+ (__v8si)_mm256_setzero_si256());
+
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
+_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpbusds256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) -1);
+ return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A,
+ (__v8si)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
+ (__v8si)__S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpwssd256_maskz ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
+ (__v8si)_mm256_setzero_si256());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
+_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpwssd256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) -1);
+ return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A,
+ (__v8si)__B);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
+ (__v8si)__S);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
{
- return (__m256i) __builtin_ia32_vpdpwssds256_maskz ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) __U);
+ return (__m256i)__builtin_ia32_selectd_256(__U,
+ (__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
+ (__v8si)_mm256_setzero_si256());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
{
- return (__m256i) __builtin_ia32_vpdpwssds256_mask ((__v8si) __S,
- (__v8si) __A,
- (__v8si) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A,
+ (__v4si)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpbusd_epi32(__S, __A, __B),
+ (__v4si)__S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpbusd128_maskz ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpbusd_epi32(__S, __A, __B),
+ (__v4si)_mm_setzero_si128());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
+_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpbusd128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A,
+ (__v4si)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpbusds_epi32(__S, __A, __B),
+ (__v4si)__S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpbusds128_maskz ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpbusds_epi32(__S, __A, __B),
+ (__v4si)_mm_setzero_si128());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
+_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpbusds128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A,
+ (__v4si)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpwssd_epi32(__S, __A, __B),
+ (__v4si)__S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpwssd128_maskz ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpwssd_epi32(__S, __A, __B),
+ (__v4si)_mm_setzero_si128());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
+_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpwssd128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) -1);
+ return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A,
+ (__v4si)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpwssds_epi32(__S, __A, __B),
+ (__v4si)__S);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
{
- return (__m128i) __builtin_ia32_vpdpwssds128_maskz ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) __U);
+ return (__m128i)__builtin_ia32_selectd_128(__U,
+ (__v4si)_mm_dpwssds_epi32(__S, __A, __B),
+ (__v4si)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
-{
- return (__m128i) __builtin_ia32_vpdpwssds128_mask ((__v4si) __S,
- (__v4si) __A,
- (__v4si) __B,
- (__mmask8) -1);
-}
-
-
#undef __DEFAULT_FN_ATTRS
#endif
diff --git a/clang/lib/Headers/avx512vnniintrin.h b/clang/lib/Headers/avx512vnniintrin.h
index 0c6badd231a..410e1ee59e2 100644
--- a/clang/lib/Headers/avx512vnniintrin.h
+++ b/clang/lib/Headers/avx512vnniintrin.h
@@ -33,114 +33,97 @@
static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
+{
+ return (__m512i)__builtin_ia32_vpdpbusd512((__v16si)__S, (__v16si)__A,
+ (__v16si)__B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpbusd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
+ (__v16si)__S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpbusd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpbusd512_maskz ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpbusd_epi32(__S, __A, __B),
+ (__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpbusd_epi32(__m512i __S, __m512i __A, __m512i __B)
+_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpbusd512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) -1);
+ return (__m512i)__builtin_ia32_vpdpbusds512((__v16si)__S, (__v16si)__A,
+ (__v16si)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpbusds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
+ (__v16si)__S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpbusds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpbusds512_maskz ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpbusds_epi32(__S, __A, __B),
+ (__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpbusds_epi32(__m512i __S, __m512i __A, __m512i __B)
+_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpbusds512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) -1);
+ return (__m512i)__builtin_ia32_vpdpwssd512((__v16si)__S, (__v16si)__A,
+ (__v16si)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpwssd_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
+ (__v16si)__S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpwssd_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpwssd512_maskz ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpwssd_epi32(__S, __A, __B),
+ (__v16si)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpwssd_epi32(__m512i __S, __m512i __A, __m512i __B)
+_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpwssd512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) -1);
+ return (__m512i)__builtin_ia32_vpdpwssds512((__v16si)__S, (__v16si)__A,
+ (__v16si)__B);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_dpwssds_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
+ (__v16si)__S);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_dpwssds_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
{
- return (__m512i) __builtin_ia32_vpdpwssds512_maskz ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) __U);
-}
-
-static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_dpwssds_epi32(__m512i __S, __m512i __A, __m512i __B)
-{
- return (__m512i) __builtin_ia32_vpdpwssds512_mask ((__v16si) __S,
- (__v16si) __A,
- (__v16si) __B,
- (__mmask16) -1);
+ return (__m512i)__builtin_ia32_selectd_512(__U,
+ (__v16si)_mm512_dpwssds_epi32(__S, __A, __B),
+ (__v16si)_mm512_setzero_si512());
}
-
#undef __DEFAULT_FN_ATTRS
#endif
OpenPOWER on IntegriCloud