summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorAdam Nemet <anemet@apple.com>2014-08-05 17:28:23 +0000
committerAdam Nemet <anemet@apple.com>2014-08-05 17:28:23 +0000
commit5bf7baa938b316047b897d5652d41227776dbc33 (patch)
treee60d62ece4b637909f18f6644a060b6d9ea8d498 /clang/lib
parentfd2161b710805dd84f1a4874660e5aefc84812a2 (diff)
downloadbcm5719-llvm-5bf7baa938b316047b897d5652d41227776dbc33.tar.gz
bcm5719-llvm-5bf7baa938b316047b897d5652d41227776dbc33.zip
[AVX512] Add intrinsic for valignd/q
Note that similar to palingr, we could further optimize these to emit shufflevector when the shift count is <=64. This however does not change the overall design that unlike palignr we would still need the LLVM intrinsic corresponding to this intruction to handle the >64 cases. (palignr uses the psrldq intrinsic in this case.) llvm-svn: 214891
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/Headers/avx512fintrin.h20
1 files changed, 20 insertions, 0 deletions
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index c205662c164..ad92fe7b64b 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -529,6 +529,26 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
(__mmask16) -1);
}
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_valign_epi64(__m512i __A, __m512i __B, const int __I)
+{
+ return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A,
+ (__v8di)__B,
+ __I,
+ (__v8di)_mm512_setzero_si512(),
+ (__mmask8) -1);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_valign_epi32(__m512i __A, __m512i __B, const int __I)
+{
+ return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A,
+ (__v16si)__B,
+ __I,
+ (__v16si)_mm512_setzero_si512(),
+ (__mmask16) -1);
+}
+
/* Vector Blend */
static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
OpenPOWER on IntegriCloud