diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-06-08 03:24:47 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-06-08 03:24:47 +0000 |
| commit | 3428beeb2f7753d98572eb04ccda4fb59b0b0af4 (patch) | |
| tree | 37a2ba731c313982eeb55517709d815266f1f545 /clang/lib/Headers/avx512vlintrin.h | |
| parent | 010edd37f82270a71129a6cea30d01722c0ead35 (diff) | |
| download | bcm5719-llvm-3428beeb2f7753d98572eb04ccda4fb59b0b0af4.tar.gz bcm5719-llvm-3428beeb2f7753d98572eb04ccda4fb59b0b0af4.zip | |
[X86] Add subvector insert and extract builtins to enable target feature checking and immediate range checking.
Test changes are due to differences in how we generate undef elements now. We also changed the types used for extractf128_si256/insertf128_si256 to match the signature of the builtin that previously existed which this patch resurrects. This also matches gcc.
llvm-svn: 334261
Diffstat (limited to 'clang/lib/Headers/avx512vlintrin.h')
| -rw-r--r-- | clang/lib/Headers/avx512vlintrin.h | 38 |
1 files changed, 6 insertions, 32 deletions
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 49198ec5336..d1e81528cde 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -7699,12 +7699,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) } #define _mm256_extractf32x4_ps(A, imm) \ - (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \ - (__v8sf)_mm256_undefined_ps(), \ - ((imm) & 1) ? 4 : 0, \ - ((imm) & 1) ? 5 : 1, \ - ((imm) & 1) ? 6 : 2, \ - ((imm) & 1) ? 7 : 3) + (__m128)__builtin_ia32_extractf32x4_256((__v8sf)(__m256)(A), (int)(imm)) #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ @@ -7717,12 +7712,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v4sf)_mm_setzero_ps()) #define _mm256_extracti32x4_epi32(A, imm) \ - (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \ - (__v8si)_mm256_undefined_si256(), \ - ((imm) & 1) ? 4 : 0, \ - ((imm) & 1) ? 5 : 1, \ - ((imm) & 1) ? 6 : 2, \ - ((imm) & 1) ? 7 : 3) + (__m128i)__builtin_ia32_extracti32x4_256((__v8si)(__m256i)(A), (int)(imm)) #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ @@ -7735,16 +7725,8 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v4si)_mm_setzero_si128()) #define _mm256_insertf32x4(A, B, imm) \ - (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \ - (__v8sf)_mm256_castps128_ps256((__m128)(B)), \ - ((imm) & 0x1) ? 0 : 8, \ - ((imm) & 0x1) ? 1 : 9, \ - ((imm) & 0x1) ? 2 : 10, \ - ((imm) & 0x1) ? 3 : 11, \ - ((imm) & 0x1) ? 8 : 4, \ - ((imm) & 0x1) ? 9 : 5, \ - ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7) + (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \ + (__v4sf)(__m128)(B), (int)(imm)) #define _mm256_mask_insertf32x4(W, U, A, B, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ @@ -7757,16 +7739,8 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v8sf)_mm256_setzero_ps()) #define _mm256_inserti32x4(A, B, imm) \ - (__m256i)__builtin_shufflevector((__v8si)(__m256i)(A), \ - (__v8si)_mm256_castsi128_si256((__m128i)(B)), \ - ((imm) & 0x1) ? 0 : 8, \ - ((imm) & 0x1) ? 1 : 9, \ - ((imm) & 0x1) ? 2 : 10, \ - ((imm) & 0x1) ? 3 : 11, \ - ((imm) & 0x1) ? 8 : 4, \ - ((imm) & 0x1) ? 9 : 5, \ - ((imm) & 0x1) ? 10 : 6, \ - ((imm) & 0x1) ? 11 : 7) + (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \ + (__v4si)(__m128i)(B), (int)(imm)) #define _mm256_mask_inserti32x4(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ |

