diff options
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 30 | ||||
| -rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 85 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 4 | 
3 files changed, 52 insertions, 67 deletions
| diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 14bf0a78ad5..98626dcc1dd 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9889,24 +9889,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,    case X86::BI__builtin_ia32_sqrtpd256:    case X86::BI__builtin_ia32_sqrtpd:    case X86::BI__builtin_ia32_sqrtps256: -  case X86::BI__builtin_ia32_sqrtps: { -    Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType()); -    return Builder.CreateCall(F, {Ops[0]}); -  } -  case X86::BI__builtin_ia32_sqrtps512_mask: -  case X86::BI__builtin_ia32_sqrtpd512_mask: { -    unsigned CC = cast<llvm::ConstantInt>(Ops[3])->getZExtValue(); -    // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), -    // otherwise keep the intrinsic. -    if (CC != 4) { -      Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512_mask ? -                          Intrinsic::x86_avx512_mask_sqrt_ps_512 : -                          Intrinsic::x86_avx512_mask_sqrt_pd_512; -      return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); +  case X86::BI__builtin_ia32_sqrtps: +  case X86::BI__builtin_ia32_sqrtps512: +  case X86::BI__builtin_ia32_sqrtpd512: { +    if (Ops.size() == 2) { +      unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); +      // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), +      // otherwise keep the intrinsic. +      if (CC != 4) { +        Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ? +                            Intrinsic::x86_avx512_sqrt_ps_512 : +                            Intrinsic::x86_avx512_sqrt_pd_512; +        return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); +      }      }      Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType()); -    return EmitX86Select(*this, Ops[2], Builder.CreateCall(F, {Ops[0]}), -                         Ops[1]); +    return Builder.CreateCall(F, Ops[0]);    }    case X86::BI__builtin_ia32_pabsb128:    case X86::BI__builtin_ia32_pabsw128: diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 534007b69fc..bfb645415e4 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -1492,89 +1492,76 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {                                               (__v8di)__W);  } +#define _mm512_sqrt_round_pd(A, R) \ +  (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)) +  #define _mm512_mask_sqrt_round_pd(W, U, A, R) \ -  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ -                                         (__v8df)(__m512d)(W), (__mmask8)(U), \ -                                         (int)(R)) +  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ +                                       (__v8df)_mm512_sqrt_round_pd((A), (R)), \ +                                       (__v8df)(__m512d)(W))  #define _mm512_maskz_sqrt_round_pd(U, A, R) \ -  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ -                                         (__v8df)_mm512_setzero_pd(), \ -                                         (__mmask8)(U), (int)(R)) - -#define _mm512_sqrt_round_pd(A, R) \ -  (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ -                                         (__v8df)_mm512_undefined_pd(), \ -                                         (__mmask8)-1, (int)(R)) +  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ +                                       (__v8df)_mm512_sqrt_round_pd((A), (R)), \ +                                       (__v8df)_mm512_setzero_pd())  static  __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_sqrt_pd(__m512d __a) +_mm512_sqrt_pd(__m512d __A)  { -  return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a, -                                                (__v8df) _mm512_setzero_pd (), -                                                (__mmask8) -1, -                                                _MM_FROUND_CUR_DIRECTION); +  return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, +                                           _MM_FROUND_CUR_DIRECTION);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)  { -  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, -                   (__v8df) __W, -                   (__mmask8) __U, -                   _MM_FROUND_CUR_DIRECTION); +  return (__m512d)__builtin_ia32_selectpd_512(__U, +                                              (__v8df)_mm512_sqrt_pd(__A), +                                              (__v8df)__W);  }  static __inline__ __m512d __DEFAULT_FN_ATTRS  _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)  { -  return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, -                   (__v8df) -                   _mm512_setzero_pd (), -                   (__mmask8) __U, -                   _MM_FROUND_CUR_DIRECTION); +  return (__m512d)__builtin_ia32_selectpd_512(__U, +                                              (__v8df)_mm512_sqrt_pd(__A), +                                              (__v8df)_mm512_setzero_pd());  } +#define _mm512_sqrt_round_ps(A, R) \ +  (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)) +  #define _mm512_mask_sqrt_round_ps(W, U, A, R) \ -  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ -                                        (__v16sf)(__m512)(W), (__mmask16)(U), \ -                                        (int)(R)) +  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ +                                      (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ +                                      (__v16sf)(__m512)(W))  #define _mm512_maskz_sqrt_round_ps(U, A, R) \ -  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ -                                        (__v16sf)_mm512_setzero_ps(), \ -                                        (__mmask16)(U), (int)(R)) - -#define _mm512_sqrt_round_ps(A, R) \ -  (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ -                                        (__v16sf)_mm512_undefined_ps(), \ -                                        (__mmask16)-1, (int)(R)) +  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ +                                      (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ +                                      (__v16sf)_mm512_setzero_ps())  static  __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_sqrt_ps(__m512 __a) +_mm512_sqrt_ps(__m512 __A)  { -  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a, -                                               (__v16sf) _mm512_setzero_ps (), -                                               (__mmask16) -1, -                                               _MM_FROUND_CUR_DIRECTION); +  return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, +                                          _MM_FROUND_CUR_DIRECTION);  }  static  __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)  { -  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, -                                               (__v16sf) __W, -                                               (__mmask16) __U, -                                               _MM_FROUND_CUR_DIRECTION); +  return (__m512)__builtin_ia32_selectps_512(__U, +                                             (__v16sf)_mm512_sqrt_ps(__A), +                                             (__v16sf)__W);  }  static  __inline__ __m512 __DEFAULT_FN_ATTRS  _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)  { -  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, -                                               (__v16sf) _mm512_setzero_ps (), -                                               (__mmask16) __U, -                                               _MM_FROUND_CUR_DIRECTION); +  return (__m512)__builtin_ia32_selectps_512(__U, +                                             (__v16sf)_mm512_sqrt_ps(__A), +                                             (__v16sf)_mm512_setzero_ps());  }  static  __inline__ __m512d __DEFAULT_FN_ATTRS diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 12a599b4f13..22483f8242d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2373,6 +2373,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {    case X86::BI__builtin_ia32_vcvtss2si64:    case X86::BI__builtin_ia32_vcvtss2usi32:    case X86::BI__builtin_ia32_vcvtss2usi64: +  case X86::BI__builtin_ia32_sqrtpd512: +  case X86::BI__builtin_ia32_sqrtps512:      ArgNum = 1;      HasRC = true;      break; @@ -2404,8 +2406,6 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {    case X86::BI__builtin_ia32_cvtqq2ps512_mask:    case X86::BI__builtin_ia32_cvtuqq2pd512_mask:    case X86::BI__builtin_ia32_cvtuqq2ps512_mask: -  case X86::BI__builtin_ia32_sqrtpd512_mask: -  case X86::BI__builtin_ia32_sqrtps512_mask:      ArgNum = 3;      HasRC = true;      break; | 

