summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp30
-rw-r--r--clang/lib/Headers/avx512fintrin.h85
-rw-r--r--clang/lib/Sema/SemaChecking.cpp4
3 files changed, 52 insertions, 67 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 14bf0a78ad5..98626dcc1dd 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9889,24 +9889,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_sqrtpd256:
case X86::BI__builtin_ia32_sqrtpd:
case X86::BI__builtin_ia32_sqrtps256:
- case X86::BI__builtin_ia32_sqrtps: {
- Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
- return Builder.CreateCall(F, {Ops[0]});
- }
- case X86::BI__builtin_ia32_sqrtps512_mask:
- case X86::BI__builtin_ia32_sqrtpd512_mask: {
- unsigned CC = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
- // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
- // otherwise keep the intrinsic.
- if (CC != 4) {
- Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512_mask ?
- Intrinsic::x86_avx512_mask_sqrt_ps_512 :
- Intrinsic::x86_avx512_mask_sqrt_pd_512;
- return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ case X86::BI__builtin_ia32_sqrtps:
+ case X86::BI__builtin_ia32_sqrtps512:
+ case X86::BI__builtin_ia32_sqrtpd512: {
+ if (Ops.size() == 2) {
+ unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
+ // otherwise keep the intrinsic.
+ if (CC != 4) {
+ Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ?
+ Intrinsic::x86_avx512_sqrt_ps_512 :
+ Intrinsic::x86_avx512_sqrt_pd_512;
+ return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ }
}
Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
- return EmitX86Select(*this, Ops[2], Builder.CreateCall(F, {Ops[0]}),
- Ops[1]);
+ return Builder.CreateCall(F, Ops[0]);
}
case X86::BI__builtin_ia32_pabsb128:
case X86::BI__builtin_ia32_pabsw128:
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 534007b69fc..bfb645415e4 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1492,89 +1492,76 @@ _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
(__v8di)__W);
}
+#define _mm512_sqrt_round_pd(A, R) \
+ (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))
+
#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
- (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(W), (__mmask8)(U), \
- (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_sqrt_round_pd((A), (R)), \
+ (__v8df)(__m512d)(W))
#define _mm512_maskz_sqrt_round_pd(U, A, R) \
- (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U), (int)(R))
-
-#define _mm512_sqrt_round_pd(A, R) \
- (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1, (int)(R))
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_sqrt_round_pd((A), (R)), \
+ (__v8df)_mm512_setzero_pd())
static __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_sqrt_pd(__m512d __a)
+_mm512_sqrt_pd(__m512d __A)
{
- return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
- (__v8df) _mm512_setzero_pd (),
- (__mmask8) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
{
- return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
- (__v8df) __W,
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(__U,
+ (__v8df)_mm512_sqrt_pd(__A),
+ (__v8df)__W);
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
{
- return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
- (__v8df)
- _mm512_setzero_pd (),
- (__mmask8) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512d)__builtin_ia32_selectpd_512(__U,
+ (__v8df)_mm512_sqrt_pd(__A),
+ (__v8df)_mm512_setzero_pd());
}
+#define _mm512_sqrt_round_ps(A, R) \
+ (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
+
#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
- (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(W), (__mmask16)(U), \
- (int)(R))
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
+ (__v16sf)(__m512)(W))
#define _mm512_maskz_sqrt_round_ps(U, A, R) \
- (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U), (int)(R))
-
-#define _mm512_sqrt_round_ps(A, R) \
- (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_undefined_ps(), \
- (__mmask16)-1, (int)(R))
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
+ (__v16sf)_mm512_setzero_ps())
static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_sqrt_ps(__m512 __a)
+_mm512_sqrt_ps(__m512 __A)
{
- return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
- (__v16sf) _mm512_setzero_ps (),
- (__mmask16) -1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
{
- return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
- (__v16sf) __W,
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512)__builtin_ia32_selectps_512(__U,
+ (__v16sf)_mm512_sqrt_ps(__A),
+ (__v16sf)__W);
}
static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
{
- return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A,
- (__v16sf) _mm512_setzero_ps (),
- (__mmask16) __U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512)__builtin_ia32_selectps_512(__U,
+ (__v16sf)_mm512_sqrt_ps(__A),
+ (__v16sf)_mm512_setzero_ps());
}
static __inline__ __m512d __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 12a599b4f13..22483f8242d 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2373,6 +2373,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vcvtss2si64:
case X86::BI__builtin_ia32_vcvtss2usi32:
case X86::BI__builtin_ia32_vcvtss2usi64:
+ case X86::BI__builtin_ia32_sqrtpd512:
+ case X86::BI__builtin_ia32_sqrtps512:
ArgNum = 1;
HasRC = true;
break;
@@ -2404,8 +2406,6 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_cvtqq2ps512_mask:
case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
- case X86::BI__builtin_ia32_sqrtpd512_mask:
- case X86::BI__builtin_ia32_sqrtps512_mask:
ArgNum = 3;
HasRC = true;
break;
OpenPOWER on IntegriCloud