summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2011-12-23 08:31:16 +0000
committerCraig Topper <craig.topper@gmail.com>2011-12-23 08:31:16 +0000
commit235a365d58a634014190fd2f3baa6a7319c5ee0e (patch)
treea41c7e5bdba6140e62b4022d1809ac9c160578d6
parente99b27f35e802bb3b7ef40068e75f8fb3378c103 (diff)
downloadbcm5719-llvm-235a365d58a634014190fd2f3baa6a7319c5ee0e.tar.gz
bcm5719-llvm-235a365d58a634014190fd2f3baa6a7319c5ee0e.zip
Add AVX2 multiply intrinsics.
llvm-svn: 147219
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def5
-rw-r--r--clang/lib/Headers/avx2intrin.h42
-rw-r--r--clang/test/CodeGen/avx2-builtins.c35
3 files changed, 82 insertions, 0 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index b9d5b4fa990..458c302a9ab 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -527,5 +527,10 @@ BUILTIN(__builtin_ia32_pmovzxbq256, "V4LLiV16c", "")
BUILTIN(__builtin_ia32_pmovzxwd256, "V8iV8s", "")
BUILTIN(__builtin_ia32_pmovzxwq256, "V4LLiV8s", "")
BUILTIN(__builtin_ia32_pmovzxdq256, "V4LLiV4i", "")
+BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "")
+BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "")
+BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "")
+BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "")
+BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "")
#undef BUILTIN
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index fe6137c47c4..a1e99156336 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -405,6 +405,48 @@ _mm256_cvtepu32_epi64(__m128i __V)
return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V);
}
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mul_epi32(__m256i a, __m256i b)
+{
+ return (__m256i)__builtin_ia32_pmuldq256((__v8si)a, (__v8si)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mulhrs_epi16(__m256i a, __m256i b)
+{
+ return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)a, (__v16hi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mulhi_epu16(__m256i a, __m256i b)
+{
+ return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)a, (__v16hi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mulhi_epi16(__m256i a, __m256i b)
+{
+ return (__m256i)__builtin_ia32_pmulhw256((__v16hi)a, (__v16hi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mullo_epi16(__m256i a, __m256i b)
+{
+ return (__m256i)((__v16hi)a * (__v16hi)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mullo_epi32 (__m256i a, __m256i b)
+{
+ return (__m256i)((__v8si)a * (__v8si)b);
+}
+
+static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
+_mm256_mul_epu32(__m256i a, __m256i b)
+{
+ return __builtin_ia32_pmuludq256((__v8si)a, (__v8si)b);
+}
+
static __inline__ __m256i __attribute__((__always_inline__, __nodebug__))
_mm256_or_si256(__m256i a, __m256i b)
{
diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/avx2-builtins.c
index 9e8f4712529..bc4a86e00d8 100644
--- a/clang/test/CodeGen/avx2-builtins.c
+++ b/clang/test/CodeGen/avx2-builtins.c
@@ -380,3 +380,38 @@ __m256 test_mm256_cvtepu32_epi64(__m128 a) {
// CHECK: @llvm.x86.avx2.pmovzxdq
return _mm256_cvtepu32_epi64(a);
}
+
+__m256 test_mm256_mul_epi32(__m256 a, __m256 b) {
+ // CHECK: @llvm.x86.avx2.pmul.dq
+ return _mm256_mul_epi32(a, b);
+}
+
+__m256 test_mm256_mulhrs_epi16(__m256 a, __m256 b) {
+ // CHECK: @llvm.x86.avx2.pmul.hr.sw
+ return _mm256_mulhrs_epi16(a, b);
+}
+
+__m256 test_mm256_mulhi_epu16(__m256 a, __m256 b) {
+ // CHECK: @llvm.x86.avx2.pmulhu.w
+ return _mm256_mulhi_epu16(a, b);
+}
+
+__m256 test_mm256_mulhi_epi16(__m256 a, __m256 b) {
+ // CHECK: @llvm.x86.avx2.pmulh.w
+ return _mm256_mulhi_epi16(a, b);
+}
+
+__m256 test_mm256_mullo_epi16(__m256 a, __m256 b) {
+ // CHECK: mul <16 x i16>
+ return _mm256_mullo_epi16(a, b);
+}
+
+__m256 test_mm256_mullo_epi32(__m256 a, __m256 b) {
+ // CHECK: mul <8 x i32>
+ return _mm256_mullo_epi32(a, b);
+}
+
+__m256 test_mm256_mul_epu32(__m256 a, __m256 b) {
+ // CHECK: @llvm.x86.avx2.pmulu.dq
+ return _mm256_mul_epu32(a, b);
+}
OpenPOWER on IntegriCloud