diff options
author | Craig Topper <craig.topper@gmail.com> | 2011-12-23 08:31:16 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2011-12-23 08:31:16 +0000 |
commit | 235a365d58a634014190fd2f3baa6a7319c5ee0e (patch) | |
tree | a41c7e5bdba6140e62b4022d1809ac9c160578d6 | |
parent | e99b27f35e802bb3b7ef40068e75f8fb3378c103 (diff) | |
download | bcm5719-llvm-235a365d58a634014190fd2f3baa6a7319c5ee0e.tar.gz bcm5719-llvm-235a365d58a634014190fd2f3baa6a7319c5ee0e.zip |
Add AVX2 multiply intrinsics.
llvm-svn: 147219
-rw-r--r-- | clang/include/clang/Basic/BuiltinsX86.def | 5 | ||||
-rw-r--r-- | clang/lib/Headers/avx2intrin.h | 42 | ||||
-rw-r--r-- | clang/test/CodeGen/avx2-builtins.c | 35 |
3 files changed, 82 insertions, 0 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index b9d5b4fa990..458c302a9ab 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -527,5 +527,10 @@ BUILTIN(__builtin_ia32_pmovzxbq256, "V4LLiV16c", "") BUILTIN(__builtin_ia32_pmovzxwd256, "V8iV8s", "") BUILTIN(__builtin_ia32_pmovzxwq256, "V4LLiV8s", "") BUILTIN(__builtin_ia32_pmovzxdq256, "V4LLiV4i", "") +BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "") +BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "") +BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "") +BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "") +BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "") #undef BUILTIN diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index fe6137c47c4..a1e99156336 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -405,6 +405,48 @@ _mm256_cvtepu32_epi64(__m128i __V) return (__m256i)__builtin_ia32_pmovzxdq256((__v4si)__V); } +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mul_epi32(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_pmuldq256((__v8si)a, (__v8si)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mulhrs_epi16(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)a, (__v16hi)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mulhi_epu16(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)a, (__v16hi)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mulhi_epi16(__m256i a, __m256i b) +{ + return (__m256i)__builtin_ia32_pmulhw256((__v16hi)a, (__v16hi)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mullo_epi16(__m256i a, __m256i b) +{ + return (__m256i)((__v16hi)a * (__v16hi)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mullo_epi32 (__m256i a, __m256i b) +{ + return (__m256i)((__v8si)a * (__v8si)b); +} + +static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) +_mm256_mul_epu32(__m256i a, __m256i b) +{ + return __builtin_ia32_pmuludq256((__v8si)a, (__v8si)b); +} + static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_or_si256(__m256i a, __m256i b) { diff --git a/clang/test/CodeGen/avx2-builtins.c b/clang/test/CodeGen/avx2-builtins.c index 9e8f4712529..bc4a86e00d8 100644 --- a/clang/test/CodeGen/avx2-builtins.c +++ b/clang/test/CodeGen/avx2-builtins.c @@ -380,3 +380,38 @@ __m256 test_mm256_cvtepu32_epi64(__m128 a) { // CHECK: @llvm.x86.avx2.pmovzxdq return _mm256_cvtepu32_epi64(a); } + +__m256 test_mm256_mul_epi32(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.pmul.dq + return _mm256_mul_epi32(a, b); +} + +__m256 test_mm256_mulhrs_epi16(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.pmul.hr.sw + return _mm256_mulhrs_epi16(a, b); +} + +__m256 test_mm256_mulhi_epu16(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.pmulhu.w + return _mm256_mulhi_epu16(a, b); +} + +__m256 test_mm256_mulhi_epi16(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.pmulh.w + return _mm256_mulhi_epi16(a, b); +} + +__m256 test_mm256_mullo_epi16(__m256 a, __m256 b) { + // CHECK: mul <16 x i16> + return _mm256_mullo_epi16(a, b); +} + +__m256 test_mm256_mullo_epi32(__m256 a, __m256 b) { + // CHECK: mul <8 x i32> + return _mm256_mullo_epi32(a, b); +} + +__m256 test_mm256_mul_epu32(__m256 a, __m256 b) { + // CHECK: @llvm.x86.avx2.pmulu.dq + return _mm256_mul_epu32(a, b); +} |