diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-07-07 17:03:32 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-07-07 17:03:32 +0000 |
| commit | 5cbeeedd27a4d766652694b9dcb1084b2d80ae2b (patch) | |
| tree | 29447a18eb09ae67e9595e4e8e4b3c6cc062cb97 /clang/lib | |
| parent | dc113dc7ed08e28c241dcf3b85055a75dff53ffd (diff) | |
| download | bcm5719-llvm-5cbeeedd27a4d766652694b9dcb1084b2d80ae2b.tar.gz bcm5719-llvm-5cbeeedd27a4d766652694b9dcb1084b2d80ae2b.zip | |
[X86] Fix various type mismatches in intrinsic headers and intrinsic tests that cause extra bitcasts to be emitted in the IR.
Found via imprecise grepping of the -O0 IR. There could still be more bugs out there.
llvm-svn: 336487
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 78 | ||||
| -rw-r--r-- | clang/lib/Headers/avx512vlintrin.h | 10 | ||||
| -rw-r--r-- | clang/lib/Headers/avxintrin.h | 8 |
3 files changed, 48 insertions, 48 deletions
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index be4ee62fc69..cbee4413be4 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -3374,7 +3374,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, #define _mm512_extractf64x4_pd(A, I) \ (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ - (__v4df)_mm256_undefined_si256(), \ + (__v4df)_mm256_undefined_pd(), \ (__mmask8)-1) #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ @@ -5544,7 +5544,7 @@ _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, (__v4sf) __B, - (__v4sf) _mm_setzero_pd (), + (__v4sf) _mm_setzero_ps (), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION); } @@ -5634,7 +5634,7 @@ _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), \ (int)(((D)<<2) | (C)), \ - (__v4sf)_mm_setzero_pd(), \ + (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION) @@ -6721,24 +6721,24 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) (__v8df)_mm512_setzero_pd()) #define _mm512_shuffle_ps(A, B, M) \ - (__m512d)__builtin_shufflevector((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(B), \ - 0 + (((M) >> 0) & 0x3), \ - 0 + (((M) >> 2) & 0x3), \ - 16 + (((M) >> 4) & 0x3), \ - 16 + (((M) >> 6) & 0x3), \ - 4 + (((M) >> 0) & 0x3), \ - 4 + (((M) >> 2) & 0x3), \ - 20 + (((M) >> 4) & 0x3), \ - 20 + (((M) >> 6) & 0x3), \ - 8 + (((M) >> 0) & 0x3), \ - 8 + (((M) >> 2) & 0x3), \ - 24 + (((M) >> 4) & 0x3), \ - 24 + (((M) >> 6) & 0x3), \ - 12 + (((M) >> 0) & 0x3), \ - 12 + (((M) >> 2) & 0x3), \ - 28 + (((M) >> 4) & 0x3), \ - 28 + (((M) >> 6) & 0x3)) + (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \ + (__v16sf)(__m512)(B), \ + 0 + (((M) >> 0) & 0x3), \ + 0 + (((M) >> 2) & 0x3), \ + 16 + (((M) >> 4) & 0x3), \ + 16 + (((M) >> 6) & 0x3), \ + 4 + (((M) >> 0) & 0x3), \ + 4 + (((M) >> 2) & 0x3), \ + 20 + (((M) >> 4) & 0x3), \ + 20 + (((M) >> 6) & 0x3), \ + 8 + (((M) >> 0) & 0x3), \ + 8 + (((M) >> 2) & 0x3), \ + 24 + (((M) >> 4) & 0x3), \ + 24 + (((M) >> 6) & 0x3), \ + 12 + (((M) >> 0) & 0x3), \ + 12 + (((M) >> 2) & 0x3), \ + 28 + (((M) >> 4) & 0x3), \ + 28 + (((M) >> 6) & 0x3)) #define _mm512_mask_shuffle_ps(W, U, A, B, M) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ @@ -7651,7 +7651,7 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) (__mmask8)(mask), (int)(scale)) #define _mm512_i64gather_epi32(index, addr, scale) \ - (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_ps(), \ + (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \ (int const *)(addr), \ (__v8di)(__m512i)(index), \ (__mmask8)-1, (int)(scale)) @@ -7675,7 +7675,7 @@ _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) (__mmask8)(mask), (int)(scale)) #define _mm512_i64gather_epi64(index, addr, scale) \ - (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_pd(), \ + (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \ (long long const *)(addr), \ (__v8di)(__m512i)(index), (__mmask8)-1, \ (int)(scale)) @@ -7825,16 +7825,16 @@ _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) } #define _mm_fmadd_round_ss(A, B, C, R) \ - (__m128d)__builtin_ia32_vfmaddss3_mask((__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), \ - (__v2df)(__m128d)(C), (__mmask8)-1, \ - (int)(R)) + (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), \ + (__v4sf)(__m128)(C), (__mmask8)-1, \ + (int)(R)) #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \ - (__m128d)__builtin_ia32_vfmaddss3_mask((__v2df)(__m128d)(W), \ - (__v2df)(__m128d)(A), \ - (__v2df)(__m128d)(B), (__mmask8)(U), \ - (int)(R)) + (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ + (__v4sf)(__m128)(A), \ + (__v4sf)(__m128)(B), (__mmask8)(U), \ + (int)(R)) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) @@ -8780,7 +8780,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, - (__v8di) _mm512_setzero_pd (), + (__v8di) _mm512_setzero_si512 (), (__mmask8) __U); } @@ -8812,7 +8812,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, - (__v8di) _mm512_setzero_pd(), + (__v8di) _mm512_setzero_si512(), (__mmask8) __U); } @@ -8844,7 +8844,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, - (__v16si) _mm512_setzero_ps(), + (__v16si) _mm512_setzero_si512(), (__mmask16) __U); } @@ -8876,7 +8876,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, - (__v16si) _mm512_setzero_ps(), + (__v16si) _mm512_setzero_si512(), (__mmask16) __U); } @@ -8917,16 +8917,16 @@ _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtpslo_pd (__m512 __A) { - return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); + return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) { - return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); + return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); } static __inline__ __m512d __DEFAULT_FN_ATTRS diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index a3bb5898646..66f92abcb09 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -3582,7 +3582,7 @@ _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), - (__v4sf)_mm_setzero_pd()); + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS @@ -6413,7 +6413,7 @@ _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ - (__v4df)(__m256)(W)) + (__v4df)(__m256d)(W)) #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ @@ -6427,7 +6427,7 @@ _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ - (__v8si)(__m256)(W)) + (__v8si)(__m256i)(W)) #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ @@ -6441,7 +6441,7 @@ _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ - (__v4di)(__m256)(W)) + (__v4di)(__m256i)(W)) #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \ @@ -7872,7 +7872,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v4df)_mm256_setzero_pd()) #define _mm256_permutex_epi64(X, C) \ - (__m256d)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)) + (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)) #define _mm256_mask_permutex_epi64(W, U, X, C) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 3bf7783db36..3ba6c95604e 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -4966,7 +4966,7 @@ _mm256_set_m128 (__m128 __hi, __m128 __lo) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_set_m128d (__m128d __hi, __m128d __lo) { - return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo); + return (__m256d) __builtin_shufflevector((__v2df)__lo, (__v2df)__hi, 0, 1, 2, 3); } /// Constructs a 256-bit integer vector by concatenating two 128-bit @@ -4986,7 +4986,7 @@ _mm256_set_m128d (__m128d __hi, __m128d __lo) static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set_m128i (__m128i __hi, __m128i __lo) { - return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo); + return (__m256i) __builtin_shufflevector((__v2di)__lo, (__v2di)__hi, 0, 1, 2, 3); } /// Constructs a 256-bit floating-point vector of [8 x float] by @@ -5032,7 +5032,7 @@ _mm256_setr_m128 (__m128 __lo, __m128 __hi) static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setr_m128d (__m128d __lo, __m128d __hi) { - return (__m256d)_mm256_set_m128((__m128)__hi, (__m128)__lo); + return (__m256d)_mm256_set_m128d(__hi, __lo); } /// Constructs a 256-bit integer vector by concatenating two 128-bit @@ -5053,7 +5053,7 @@ _mm256_setr_m128d (__m128d __lo, __m128d __hi) static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setr_m128i (__m128i __lo, __m128i __hi) { - return (__m256i)_mm256_set_m128((__m128)__hi, (__m128)__lo); + return (__m256i)_mm256_set_m128i(__hi, __lo); } #undef __DEFAULT_FN_ATTRS |

