[X86] Add subvector insert and extract builtins to enable target feature checking and immediate range checking.

Test changes are due to differences in how we generate undef elements now. We also changed the types used for extractf128_si256/insertf128_si256 to match the signature of the builtin that previously existed which this patch resurrects. This also matches gcc. llvm-svn: 334261
author: Craig Topper <craig.topper@intel.com> 2018-06-08 03:24:47 +0000
committer: Craig Topper <craig.topper@intel.com> 2018-06-08 03:24:47 +0000
commit: 3428beeb2f7753d98572eb04ccda4fb59b0b0af4 (patch)
tree: 37a2ba731c313982eeb55517709d815266f1f545 /clang/lib/Headers/avxintrin.h
parent: 010edd37f82270a71129a6cea30d01722c0ead35 (diff)
download: bcm5719-llvm-3428beeb2f7753d98572eb04ccda4fb59b0b0af4.tar.gz
bcm5719-llvm-3428beeb2f7753d98572eb04ccda4fb59b0b0af4.zip
1 files changed, 9 insertions, 42 deletions
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 7c85893ba13..6c42132cf44 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -4613,17 +4613,8 @@ _mm256_zextsi128_si256(__m128i __a)
 ///    result.
 /// \returns A 256-bit vector of [8 x float] containing the interleaved values.
 #define _mm256_insertf128_ps(V1, V2, M) \
-  (__m256)__builtin_shufflevector( \
-    (__v8sf)(__m256)(V1), \
-    (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
-    (((M) & 1) ?  0 :  8), \
-    (((M) & 1) ?  1 :  9), \
-    (((M) & 1) ?  2 : 10), \
-    (((M) & 1) ?  3 : 11), \
-    (((M) & 1) ?  8 :  4), \
-    (((M) & 1) ?  9 :  5), \
-    (((M) & 1) ? 10 :  6), \
-    (((M) & 1) ? 11 :  7) )
+  (__m256)__builtin_ia32_vinsertf128_ps256((__v8sf)(__m256)(V1), \
+                                           (__v4sf)(__m128)(V2), (int)(M))
 
 /// Constructs a new 256-bit vector of [4 x double] by first duplicating
 ///    a 256-bit vector of [4 x double] given in the first parameter, and then
@@ -4660,13 +4651,8 @@ _mm256_zextsi128_si256(__m128i __a)
 ///    result.
 /// \returns A 256-bit vector of [4 x double] containing the interleaved values.
 #define _mm256_insertf128_pd(V1, V2, M) \
-  (__m256d)__builtin_shufflevector( \
-    (__v4df)(__m256d)(V1), \
-    (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
-    (((M) & 1) ? 0 : 4), \
-    (((M) & 1) ? 1 : 5), \
-    (((M) & 1) ? 4 : 2), \
-    (((M) & 1) ? 5 : 3) )
+  (__m256d)__builtin_ia32_vinsertf128_pd256((__v4df)(__m256d)(V1), \
+                                            (__v2df)(__m128d)(V2), (int)(M))
 
 /// Constructs a new 256-bit integer vector by first duplicating a
 ///    256-bit integer vector given in the first parameter, and then replacing
@@ -4703,13 +4689,8 @@ _mm256_zextsi128_si256(__m128i __a)
 ///    result.
 /// \returns A 256-bit integer vector containing the interleaved values.
 #define _mm256_insertf128_si256(V1, V2, M) \
-  (__m256i)__builtin_shufflevector( \
-    (__v4di)(__m256i)(V1), \
-    (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
-    (((M) & 1) ? 0 : 4), \
-    (((M) & 1) ? 1 : 5), \
-    (((M) & 1) ? 4 : 2), \
-    (((M) & 1) ? 5 : 3) )
+  (__m256i)__builtin_ia32_vinsertf128_si256((__v4di)(__m256i)(V1), \
+                                            (__v2di)(__m128i)(V2), (int)(M))
 
 /*
    Vector extract.
@@ -4738,13 +4719,7 @@ _mm256_zextsi128_si256(__m128i __a)
 ///    If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
 /// \returns A 128-bit vector of [4 x float] containing the extracted bits.
 #define _mm256_extractf128_ps(V, M) \
-  (__m128)__builtin_shufflevector( \
-    (__v8sf)(__m256)(V), \
-    (__v8sf)(_mm256_undefined_ps()), \
-    (((M) & 1) ? 4 : 0), \
-    (((M) & 1) ? 5 : 1), \
-    (((M) & 1) ? 6 : 2), \
-    (((M) & 1) ? 7 : 3) )
+  (__m128)__builtin_ia32_vextractf128_ps256((__v8sf)(__m256)(V), (int)(M))
 
 /// Extracts either the upper or the lower 128 bits from a 256-bit vector
 ///    of [4 x double], as determined by the immediate integer parameter, and
@@ -4768,11 +4743,7 @@ _mm256_zextsi128_si256(__m128i __a)
 ///    If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
 /// \returns A 128-bit vector of [2 x double] containing the extracted bits.
 #define _mm256_extractf128_pd(V, M) \
-  (__m128d)__builtin_shufflevector( \
-    (__v4df)(__m256d)(V), \
-    (__v4df)(_mm256_undefined_pd()), \
-    (((M) & 1) ? 2 : 0), \
-    (((M) & 1) ? 3 : 1) )
+  (__m128d)__builtin_ia32_vextractf128_pd256((__v4df)(__m256d)(V), (int)(M))
 
 /// Extracts either the upper or the lower 128 bits from a 256-bit
 ///    integer vector, as determined by the immediate integer parameter, and
@@ -4796,11 +4767,7 @@ _mm256_zextsi128_si256(__m128i __a)
 ///    If bit [0] of \a M is 1, bits [255:128] of \a V are copied to the result.
 /// \returns A 128-bit integer vector containing the extracted bits.
 #define _mm256_extractf128_si256(V, M) \
-  (__m128i)__builtin_shufflevector( \
-    (__v4di)(__m256i)(V), \
-    (__v4di)(_mm256_undefined_si256()), \
-    (((M) & 1) ? 2 : 0), \
-    (((M) & 1) ? 3 : 1) )
+  (__m128i)__builtin_ia32_vextractf128_si256((__v4di)(__m256i)(V), (int)(M))
 
 /* SIMD load ops (unaligned) */
 /// Loads two 128-bit floating-point vectors of [4 x float] from
author	Craig Topper <craig.topper@intel.com>	2018-06-08 03:24:47 +0000
committer	Craig Topper <craig.topper@intel.com>	2018-06-08 03:24:47 +0000
commit	3428beeb2f7753d98572eb04ccda4fb59b0b0af4 (patch)
tree	37a2ba731c313982eeb55517709d815266f1f545 /clang/lib/Headers/avxintrin.h
parent	010edd37f82270a71129a6cea30d01722c0ead35 (diff)
download	bcm5719-llvm-3428beeb2f7753d98572eb04ccda4fb59b0b0af4.tar.gz bcm5719-llvm-3428beeb2f7753d98572eb04ccda4fb59b0b0af4.zip