summaryrefslogtreecommitdiffstats
path: root/clang/lib/Headers/avxintrin.h
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-08 03:24:47 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-08 03:24:47 +0000
commit3428beeb2f7753d98572eb04ccda4fb59b0b0af4 (patch)
tree37a2ba731c313982eeb55517709d815266f1f545 /clang/lib/Headers/avxintrin.h
parent010edd37f82270a71129a6cea30d01722c0ead35 (diff)
downloadbcm5719-llvm-3428beeb2f7753d98572eb04ccda4fb59b0b0af4.tar.gz
bcm5719-llvm-3428beeb2f7753d98572eb04ccda4fb59b0b0af4.zip
[X86] Add subvector insert and extract builtins to enable target feature checking and immediate range checking.
Test changes are due to differences in how we generate undef elements now. We also changed the types used for extractf128_si256/insertf128_si256 to match the signature of the builtin that previously existed which this patch resurrects. This also matches gcc. llvm-svn: 334261
Diffstat (limited to 'clang/lib/Headers/avxintrin.h')
-rw-r--r--clang/lib/Headers/avxintrin.h51
1 files changed, 9 insertions, 42 deletions
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 7c85893ba13..6c42132cf44 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -4613,17 +4613,8 @@ _mm256_zextsi128_si256(__m128i __a)
/// result.
/// \returns A 256-bit vector of [8 x float] containing the interleaved values.
#define _mm256_insertf128_ps(V1, V2, M) \
- (__m256)__builtin_shufflevector( \
- (__v8sf)(__m256)(V1), \
- (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
- (((M) & 1) ? 0 : 8), \
- (((M) & 1) ? 1 : 9), \
- (((M) & 1) ? 2 : 10), \
- (((M) & 1) ? 3 : 11), \
- (((M) & 1) ? 8 : 4), \
- (((M) & 1) ? 9 : 5), \
- (((M) & 1) ? 10 : 6), \
- (((M) & 1) ? 11 : 7) )
+ (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \
+ (__v4sf)(__m128)(V2), (int)(M))
/// Constructs a new 256-bit vector of [4 x double] by first duplicating
/// a 256-bit vector of [4 x double] given in the first parameter, and then
@@ -4660,13 +4651,8 @@ _mm256_zextsi128_si256(__m128i __a)
/// result.
/// \returns A 256-bit vector of [4 x double] containing the interleaved values.
#define _mm256_insertf128_pd(V1, V2, M) \
- (__m256d)__builtin_shufflevector( \
- (__v4df)(__m256d)(V1), \
- (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
- (((M) & 1) ? 0 : 4), \
- (((M) & 1) ? 1 : 5), \
- (((M) & 1) ? 4 : 2), \
- (((M) & 1) ? 5 : 3) )
+ (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \
+ (__v2df)(__m128d)(V2), (int)(M))
/// Constructs a new 256-bit integer vector by first duplicating a
/// 256-bit integer vector given in the first parameter, and then replacing
@@ -4703,13 +4689,8 @@ _mm256_zextsi128_si256(__m128i __a)
/// result.
/// \returns A 256-bit integer vector containing the interleaved values.
#define _mm256_insertf128_si256(V1, V2, M) \
- (__m256i)__builtin_shufflevector( \
- (__v4di)(__m256i)(V1), \
- (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
- (((M) & 1) ? 0 : 4), \
- (((M) & 1) ? 1 : 5), \
- (((M) & 1) ? 4 : 2), \
- (((M) & 1) ? 5 : 3) )
+ (__m256i)__builtin_ia32_vinsertf128_si256((__v4di)(__m256i)(V1), \
+ (__v2di)(__m128i)(V2), (int)(M))
/*
Vector extract.
@@ -4738,13 +4719,7 @@ _mm256_zextsi128_si256(__m128i __a)
/// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
/// \returns A 128-bit vector of [4 x float] containing the extracted bits.
#define _mm256_extractf128_ps(V, M) \
- (__m128)__builtin_shufflevector( \
- (__v8sf)(__m256)(V), \
- (__v8sf)(_mm256_undefined_ps()), \
- (((M) & 1) ? 4 : 0), \
- (((M) & 1) ? 5 : 1), \
- (((M) & 1) ? 6 : 2), \
- (((M) & 1) ? 7 : 3) )
+ (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))
/// Extracts either the upper or the lower 128 bits from a 256-bit vector
/// of [4 x double], as determined by the immediate integer parameter, and
@@ -4768,11 +4743,7 @@ _mm256_zextsi128_si256(__m128i __a)
/// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
/// \returns A 128-bit vector of [2 x double] containing the extracted bits.
#define _mm256_extractf128_pd(V, M) \
- (__m128d)__builtin_shufflevector( \
- (__v4df)(__m256d)(V), \
- (__v4df)(_mm256_undefined_pd()), \
- (((M) & 1) ? 2 : 0), \
- (((M) & 1) ? 3 : 1) )
+ (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))
/// Extracts either the upper or the lower 128 bits from a 256-bit
/// integer vector, as determined by the immediate integer parameter, and
@@ -4796,11 +4767,7 @@ _mm256_zextsi128_si256(__m128i __a)
/// If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
/// \returns A 128-bit integer vector containing the extracted bits.
#define _mm256_extractf128_si256(V, M) \
- (__m128i)__builtin_shufflevector( \
- (__v4di)(__m256i)(V), \
- (__v4di)(_mm256_undefined_si256()), \
- (((M) & 1) ? 2 : 0), \
- (((M) & 1) ? 3 : 1) )
+ (__m128i)__builtin_ia32_vextractf128_si256((__v4di)(__m256i)(V), (int)(M))
/* SIMD load ops (unaligned) */
/// Loads two 128-bit floating-point vectors of [4 x float] from
OpenPOWER on IntegriCloud