[X86] Add subvector insert and extract builtins to enable target feature checking and immediate range checking.

Test changes are due to differences in how we generate undef elements now. We also changed the types used for extractf128_si256/insertf128_si256 to match the signature of the builtin that previously existed which this patch resurrects. This also matches gcc. llvm-svn: 334261
author: Craig Topper <craig.topper@intel.com> 2018-06-08 03:24:47 +0000
committer: Craig Topper <craig.topper@intel.com> 2018-06-08 03:24:47 +0000
commit: 3428beeb2f7753d98572eb04ccda4fb59b0b0af4 (patch)
tree: 37a2ba731c313982eeb55517709d815266f1f545 /clang/lib/Headers/avx512vlintrin.h
parent: 010edd37f82270a71129a6cea30d01722c0ead35 (diff)
download: bcm5719-llvm-3428beeb2f7753d98572eb04ccda4fb59b0b0af4.tar.gz
bcm5719-llvm-3428beeb2f7753d98572eb04ccda4fb59b0b0af4.zip
1 files changed, 6 insertions, 32 deletions
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 49198ec5336..d1e81528cde 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -7699,12 +7699,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
 }
 
 #define _mm256_extractf32x4_ps(A, imm) \
-  (__m128)__builtin_shufflevector((__v8sf)(__m256)(A),           \
-                                  (__v8sf)_mm256_undefined_ps(), \
-                                  ((imm) & 1) ? 4 : 0,           \
-                                  ((imm) & 1) ? 5 : 1,           \
-                                  ((imm) & 1) ? 6 : 2,           \
-                                  ((imm) & 1) ? 7 : 3)
+  (__m128)__builtin_ia32_extractf32x4_256((__v8sf)(__m256)(A), (int)(imm))
 
 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
   (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
@@ -7717,12 +7712,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
                                    (__v4sf)_mm_setzero_ps())
 
 #define _mm256_extracti32x4_epi32(A, imm) \
-  (__m128i)__builtin_shufflevector((__v8si)(__m256)(A),              \
-                                   (__v8si)_mm256_undefined_si256(), \
-                                   ((imm) & 1) ? 4 : 0,              \
-                                   ((imm) & 1) ? 5 : 1,              \
-                                   ((imm) & 1) ? 6 : 2,              \
-                                   ((imm) & 1) ? 7 : 3)
+  (__m128i)__builtin_ia32_extracti32x4_256((__v8si)(__m256i)(A), (int)(imm))
 
 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
   (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
@@ -7735,16 +7725,8 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
                                 (__v4si)_mm_setzero_si128())
 
 #define _mm256_insertf32x4(A, B, imm) \
-  (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
-                                  (__v8sf)_mm256_castps128_ps256((__m128)(B)), \
-                                  ((imm) & 0x1) ?  0 :  8, \
-                                  ((imm) & 0x1) ?  1 :  9, \
-                                  ((imm) & 0x1) ?  2 : 10, \
-                                  ((imm) & 0x1) ?  3 : 11, \
-                                  ((imm) & 0x1) ?  8 :  4, \
-                                  ((imm) & 0x1) ?  9 :  5, \
-                                  ((imm) & 0x1) ? 10 :  6, \
-                                  ((imm) & 0x1) ? 11 :  7)
+  (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
+                                         (__v4sf)(__m128)(B), (int)(imm))
 
 #define _mm256_mask_insertf32x4(W, U, A, B, imm) \
   (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
@@ -7757,16 +7739,8 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
                                   (__v8sf)_mm256_setzero_ps())
 
 #define _mm256_inserti32x4(A, B, imm) \
-  (__m256i)__builtin_shufflevector((__v8si)(__m256i)(A), \
-                                 (__v8si)_mm256_castsi128_si256((__m128i)(B)), \
-                                 ((imm) & 0x1) ?  0 :  8, \
-                                 ((imm) & 0x1) ?  1 :  9, \
-                                 ((imm) & 0x1) ?  2 : 10, \
-                                 ((imm) & 0x1) ?  3 : 11, \
-                                 ((imm) & 0x1) ?  8 :  4, \
-                                 ((imm) & 0x1) ?  9 :  5, \
-                                 ((imm) & 0x1) ? 10 :  6, \
-                                 ((imm) & 0x1) ? 11 :  7)
+  (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
+                                          (__v4si)(__m128i)(B), (int)(imm))
 
 #define _mm256_mask_inserti32x4(W, U, A, B, imm) \
   (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
author	Craig Topper <craig.topper@intel.com>	2018-06-08 03:24:47 +0000
committer	Craig Topper <craig.topper@intel.com>	2018-06-08 03:24:47 +0000
commit	3428beeb2f7753d98572eb04ccda4fb59b0b0af4 (patch)
tree	37a2ba731c313982eeb55517709d815266f1f545 /clang/lib/Headers/avx512vlintrin.h
parent	010edd37f82270a71129a6cea30d01722c0ead35 (diff)
download	bcm5719-llvm-3428beeb2f7753d98572eb04ccda4fb59b0b0af4.tar.gz bcm5719-llvm-3428beeb2f7753d98572eb04ccda4fb59b0b0af4.zip