diff options
Diffstat (limited to 'clang')
| -rw-r--r-- | clang/include/clang/Basic/BuiltinsX86.def | 16 | ||||
| -rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 172 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 16 | ||||
| -rw-r--r-- | clang/test/CodeGen/avx512f-builtins.c | 64 | 
4 files changed, 126 insertions, 142 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 6be561fc0c5..fc580df0836 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -1087,14 +1087,14 @@ TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "nc", "avx512bw")  TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "nc", "avx512bw")  TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_addpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_addps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_divpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_divps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_mulpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_mulps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_subpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_subps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_addpd512, "V8dV8dV8dIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_addps512, "V16fV16fV16fIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_divpd512, "V8dV8dV8dIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_divps512, "V16fV16fV16fIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_mulpd512, "V8dV8dV8dIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_mulps512, "V16fV16fV16fIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_subpd512, "V8dV8dV8dIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_subps512, "V16fV16fV16fIi", "nc", "avx512f")  TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "nc", "avx512bw")  TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "nc", "avx512bw") diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 9daa559bb13..a4c01019592 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -2060,40 +2060,32 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {  }  #define _mm512_add_round_pd(A, B, R) \ -  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ -                                        (__v8df)(__m512d)(B), \ -                                        (__v8df)_mm512_setzero_pd(), \ -                                        (__mmask8)-1, (int)(R)) +  (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \ +                                   (__v8df)(__m512d)(B), (int)(R))  #define _mm512_mask_add_round_pd(W, U, A, B, R) \ -  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ -                                        (__v8df)(__m512d)(B), \ -                                        (__v8df)(__m512d)(W), (__mmask8)(U), \ -                                        (int)(R)) +  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ +                                   (__v8df)_mm512_add_round_pd((A), (B), (R)), \ +                                   (__v8df)(__m512d)(W));  #define _mm512_maskz_add_round_pd(U, A, B, R) \ -  (__m512d)__builtin_ia32_addpd512_mask((__v8df)(__m512d)(A), \ -                                        (__v8df)(__m512d)(B), \ -                                        (__v8df)_mm512_setzero_pd(), \ -                                        (__mmask8)(U), (int)(R)) +  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ +                                   (__v8df)_mm512_add_round_pd((A), (B), (R)), \ +                                   (__v8df)_mm512_setzero_pd());  #define _mm512_add_round_ps(A, B, R) \ -  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ -                                       (__v16sf)(__m512)(B), \ -                                       (__v16sf)_mm512_setzero_ps(), \ -                                       (__mmask16)-1, (int)(R)) +  (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ +                                  (__v16sf)(__m512)(B), (int)(R))  #define _mm512_mask_add_round_ps(W, U, A, B, R) \ -  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ -                                       (__v16sf)(__m512)(B), \ -                                       (__v16sf)(__m512)(W), (__mmask16)(U), \ -                                       (int)(R)) +  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ +                                  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ +                                  (__v16sf)(__m512)(W));  #define _mm512_maskz_add_round_ps(U, A, B, R) \ -  (__m512)__builtin_ia32_addps512_mask((__v16sf)(__m512)(A), \ -                                       (__v16sf)(__m512)(B), \ -                                       (__v16sf)_mm512_setzero_ps(), \ -                                       (__mmask16)(U), (int)(R)) +  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ +                                  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ +                                  (__v16sf)_mm512_setzero_ps());  static __inline__ __m128 __DEFAULT_FN_ATTRS  _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2195,40 +2187,32 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {  }  #define _mm512_sub_round_pd(A, B, R) \ -  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ -                                        (__v8df)(__m512d)(B), \ -                                        (__v8df)_mm512_setzero_pd(), \ -                                        (__mmask8)-1, (int)(R)) +  (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \ +                                   (__v8df)(__m512d)(B), (int)(R))  #define _mm512_mask_sub_round_pd(W, U, A, B, R) \ -  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ -                                        (__v8df)(__m512d)(B), \ -                                        (__v8df)(__m512d)(W), (__mmask8)(U), \ -                                        (int)(R)) +  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ +                                   (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ +                                   (__v8df)(__m512d)(W));  #define _mm512_maskz_sub_round_pd(U, A, B, R) \ -  (__m512d)__builtin_ia32_subpd512_mask((__v8df)(__m512d)(A), \ -                                        (__v8df)(__m512d)(B), \ -                                        (__v8df)_mm512_setzero_pd(), \ -                                        (__mmask8)(U), (int)(R)) +  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ +                                   (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ +                                   (__v8df)_mm512_setzero_pd());  #define _mm512_sub_round_ps(A, B, R) \ -  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ -                                       (__v16sf)(__m512)(B), \ -                                       (__v16sf)_mm512_setzero_ps(), \ -                                       (__mmask16)-1, (int)(R)) +  (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ +                                  (__v16sf)(__m512)(B), (int)(R)) -#define _mm512_mask_sub_round_ps(W, U, A, B, R)  \ -  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ -                                       (__v16sf)(__m512)(B), \ -                                       (__v16sf)(__m512)(W), (__mmask16)(U), \ -                                       (int)(R)) +#define _mm512_mask_sub_round_ps(W, U, A, B, R) \ +  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ +                                  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ +                                  (__v16sf)(__m512)(W)); -#define _mm512_maskz_sub_round_ps(U, A, B, R)  \ -  (__m512)__builtin_ia32_subps512_mask((__v16sf)(__m512)(A), \ -                                       (__v16sf)(__m512)(B), \ -                                       (__v16sf)_mm512_setzero_ps(), \ -                                       (__mmask16)(U), (int)(R)) +#define _mm512_maskz_sub_round_ps(U, A, B, R) \ +  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ +                                  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ +                                  (__v16sf)_mm512_setzero_ps());  static __inline__ __m128 __DEFAULT_FN_ATTRS  _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2330,40 +2314,32 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {  }  #define _mm512_mul_round_pd(A, B, R) \ -  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ -                                        (__v8df)(__m512d)(B), \ -                                        (__v8df)_mm512_setzero_pd(), \ -                                        (__mmask8)-1, (int)(R)) +  (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \ +                                   (__v8df)(__m512d)(B), (int)(R))  #define _mm512_mask_mul_round_pd(W, U, A, B, R) \ -  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ -                                        (__v8df)(__m512d)(B), \ -                                        (__v8df)(__m512d)(W), (__mmask8)(U), \ -                                        (int)(R)) +  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ +                                   (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ +                                   (__v8df)(__m512d)(W));  #define _mm512_maskz_mul_round_pd(U, A, B, R) \ -  (__m512d)__builtin_ia32_mulpd512_mask((__v8df)(__m512d)(A), \ -                                        (__v8df)(__m512d)(B), \ -                                        (__v8df)_mm512_setzero_pd(), \ -                                        (__mmask8)(U), (int)(R)) +  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ +                                   (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ +                                   (__v8df)_mm512_setzero_pd());  #define _mm512_mul_round_ps(A, B, R) \ -  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ -                                       (__v16sf)(__m512)(B), \ -                                       (__v16sf)_mm512_setzero_ps(), \ -                                       (__mmask16)-1, (int)(R)) +  (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ +                                  (__v16sf)(__m512)(B), (int)(R)) -#define _mm512_mask_mul_round_ps(W, U, A, B, R)  \ -  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ -                                       (__v16sf)(__m512)(B), \ -                                       (__v16sf)(__m512)(W), (__mmask16)(U), \ -                                       (int)(R)) +#define _mm512_mask_mul_round_ps(W, U, A, B, R) \ +  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ +                                  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ +                                  (__v16sf)(__m512)(W)); -#define _mm512_maskz_mul_round_ps(U, A, B, R)  \ -  (__m512)__builtin_ia32_mulps512_mask((__v16sf)(__m512)(A), \ -                                       (__v16sf)(__m512)(B), \ -                                       (__v16sf)_mm512_setzero_ps(), \ -                                       (__mmask16)(U), (int)(R)) +#define _mm512_maskz_mul_round_ps(U, A, B, R) \ +  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ +                                  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ +                                  (__v16sf)_mm512_setzero_ps());  static __inline__ __m128 __DEFAULT_FN_ATTRS  _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2478,40 +2454,32 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {  }  #define _mm512_div_round_pd(A, B, R) \ -  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ -                                        (__v8df)(__m512d)(B), \ -                                        (__v8df)_mm512_setzero_pd(), \ -                                        (__mmask8)-1, (int)(R)) +  (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \ +                                   (__v8df)(__m512d)(B), (int)(R))  #define _mm512_mask_div_round_pd(W, U, A, B, R) \ -  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ -                                        (__v8df)(__m512d)(B), \ -                                        (__v8df)(__m512d)(W), (__mmask8)(U), \ -                                        (int)(R)) +  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ +                                   (__v8df)_mm512_div_round_pd((A), (B), (R)), \ +                                   (__v8df)(__m512d)(W));  #define _mm512_maskz_div_round_pd(U, A, B, R) \ -  (__m512d)__builtin_ia32_divpd512_mask((__v8df)(__m512d)(A), \ -                                        (__v8df)(__m512d)(B), \ -                                        (__v8df)_mm512_setzero_pd(), \ -                                        (__mmask8)(U), (int)(R)) +  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ +                                   (__v8df)_mm512_div_round_pd((A), (B), (R)), \ +                                   (__v8df)_mm512_setzero_pd());  #define _mm512_div_round_ps(A, B, R) \ -  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ -                                       (__v16sf)(__m512)(B), \ -                                       (__v16sf)_mm512_setzero_ps(), \ -                                       (__mmask16)-1, (int)(R)) +  (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ +                                  (__v16sf)(__m512)(B), (int)(R)) -#define _mm512_mask_div_round_ps(W, U, A, B, R)  \ -  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ -                                       (__v16sf)(__m512)(B), \ -                                       (__v16sf)(__m512)(W), (__mmask16)(U), \ -                                       (int)(R)) +#define _mm512_mask_div_round_ps(W, U, A, B, R) \ +  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ +                                  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ +                                  (__v16sf)(__m512)(W)); -#define _mm512_maskz_div_round_ps(U, A, B, R)  \ -  (__m512)__builtin_ia32_divps512_mask((__v16sf)(__m512)(A), \ -                                       (__v16sf)(__m512)(B), \ -                                       (__v16sf)_mm512_setzero_ps(), \ -                                       (__mmask16)(U), (int)(R)) +#define _mm512_maskz_div_round_ps(U, A, B, R) \ +  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ +                                  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ +                                  (__v16sf)_mm512_setzero_ps());  #define _mm512_roundscale_ps(A, B) \    (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index ae9c2f021fb..bf1e3a8b4d8 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2359,6 +2359,14 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {      ArgNum = 1;      HasRC = true;      break; +  case X86::BI__builtin_ia32_addpd512: +  case X86::BI__builtin_ia32_addps512: +  case X86::BI__builtin_ia32_divpd512: +  case X86::BI__builtin_ia32_divps512: +  case X86::BI__builtin_ia32_mulpd512: +  case X86::BI__builtin_ia32_mulps512: +  case X86::BI__builtin_ia32_subpd512: +  case X86::BI__builtin_ia32_subps512:    case X86::BI__builtin_ia32_cvtsi2sd64:    case X86::BI__builtin_ia32_cvtsi2ss32:    case X86::BI__builtin_ia32_cvtsi2ss64: @@ -2384,14 +2392,6 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {      ArgNum = 3;      HasRC = true;      break; -  case X86::BI__builtin_ia32_addpd512_mask: -  case X86::BI__builtin_ia32_addps512_mask: -  case X86::BI__builtin_ia32_divpd512_mask: -  case X86::BI__builtin_ia32_divps512_mask: -  case X86::BI__builtin_ia32_mulpd512_mask: -  case X86::BI__builtin_ia32_mulps512_mask: -  case X86::BI__builtin_ia32_subpd512_mask: -  case X86::BI__builtin_ia32_subps512_mask:    case X86::BI__builtin_ia32_addss_round_mask:    case X86::BI__builtin_ia32_addsd_round_mask:    case X86::BI__builtin_ia32_divss_round_mask: diff --git a/clang/test/CodeGen/avx512f-builtins.c b/clang/test/CodeGen/avx512f-builtins.c index dab1c419128..89be9ae0d79 100644 --- a/clang/test/CodeGen/avx512f-builtins.c +++ b/clang/test/CodeGen/avx512f-builtins.c @@ -2229,17 +2229,19 @@ __m512i test_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __  __m512d test_mm512_add_round_pd(__m512d __A, __m512d __B) {    // CHECK-LABEL: @test_mm512_add_round_pd -  // CHECK: @llvm.x86.avx512.mask.add.pd.512 +  // CHECK: @llvm.x86.avx512.add.pd.512    return _mm512_add_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512d test_mm512_mask_add_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {    // CHECK-LABEL: @test_mm512_mask_add_round_pd -  // CHECK: @llvm.x86.avx512.mask.add.pd.512 +  // CHECK: @llvm.x86.avx512.add.pd.512 +  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}    return _mm512_mask_add_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512d test_mm512_maskz_add_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {    // CHECK-LABEL: @test_mm512_maskz_add_round_pd -  // CHECK: @llvm.x86.avx512.mask.add.pd.512 +  // CHECK: @llvm.x86.avx512.add.pd.512 +  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}    return _mm512_maskz_add_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512d test_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { @@ -2256,17 +2258,19 @@ __m512d test_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {  }  __m512 test_mm512_add_round_ps(__m512 __A, __m512 __B) {    // CHECK-LABEL: @test_mm512_add_round_ps -  // CHECK: @llvm.x86.avx512.mask.add.ps.512 +  // CHECK: @llvm.x86.avx512.add.ps.512    return _mm512_add_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512 test_mm512_mask_add_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {    // CHECK-LABEL: @test_mm512_mask_add_round_ps -  // CHECK: @llvm.x86.avx512.mask.add.ps.512 +  // CHECK: @llvm.x86.avx512.add.ps.512 +  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}    return _mm512_mask_add_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512 test_mm512_maskz_add_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {    // CHECK-LABEL: @test_mm512_maskz_add_round_ps -  // CHECK: @llvm.x86.avx512.mask.add.ps.512 +  // CHECK: @llvm.x86.avx512.add.ps.512 +  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}    return _mm512_maskz_add_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512 test_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { @@ -2333,17 +2337,19 @@ __m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {  }  __m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) {    // CHECK-LABEL: @test_mm512_sub_round_pd -  // CHECK: @llvm.x86.avx512.mask.sub.pd.512 +  // CHECK: @llvm.x86.avx512.sub.pd.512    return _mm512_sub_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512d test_mm512_mask_sub_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {    // CHECK-LABEL: @test_mm512_mask_sub_round_pd -  // CHECK: @llvm.x86.avx512.mask.sub.pd.512 +  // CHECK: @llvm.x86.avx512.sub.pd.512 +  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}    return _mm512_mask_sub_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512d test_mm512_maskz_sub_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {    // CHECK-LABEL: @test_mm512_maskz_sub_round_pd -  // CHECK: @llvm.x86.avx512.mask.sub.pd.512 +  // CHECK: @llvm.x86.avx512.sub.pd.512 +  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}    return _mm512_maskz_sub_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512d test_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { @@ -2360,17 +2366,19 @@ __m512d test_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {  }  __m512 test_mm512_sub_round_ps(__m512 __A, __m512 __B) {    // CHECK-LABEL: @test_mm512_sub_round_ps -  // CHECK: @llvm.x86.avx512.mask.sub.ps.512 +  // CHECK: @llvm.x86.avx512.sub.ps.512    return _mm512_sub_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512 test_mm512_mask_sub_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {    // CHECK-LABEL: @test_mm512_mask_sub_round_ps -  // CHECK: @llvm.x86.avx512.mask.sub.ps.512 +  // CHECK: @llvm.x86.avx512.sub.ps.512 +  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}    return _mm512_mask_sub_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512 test_mm512_maskz_sub_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {    // CHECK-LABEL: @test_mm512_maskz_sub_round_ps -  // CHECK: @llvm.x86.avx512.mask.sub.ps.512 +  // CHECK: @llvm.x86.avx512.sub.ps.512 +  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}    return _mm512_maskz_sub_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512 test_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { @@ -2437,17 +2445,19 @@ __m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {  }  __m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) {    // CHECK-LABEL: @test_mm512_mul_round_pd -  // CHECK: @llvm.x86.avx512.mask.mul.pd.512 +  // CHECK: @llvm.x86.avx512.mul.pd.512    return _mm512_mul_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512d test_mm512_mask_mul_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {    // CHECK-LABEL: @test_mm512_mask_mul_round_pd -  // CHECK: @llvm.x86.avx512.mask.mul.pd.512 +  // CHECK: @llvm.x86.avx512.mul.pd.512 +  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}    return _mm512_mask_mul_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512d test_mm512_maskz_mul_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {    // CHECK-LABEL: @test_mm512_maskz_mul_round_pd -  // CHECK: @llvm.x86.avx512.mask.mul.pd.512 +  // CHECK: @llvm.x86.avx512.mul.pd.512 +  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}    return _mm512_maskz_mul_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512d test_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { @@ -2464,17 +2474,19 @@ __m512d test_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {  }  __m512 test_mm512_mul_round_ps(__m512 __A, __m512 __B) {    // CHECK-LABEL: @test_mm512_mul_round_ps -  // CHECK: @llvm.x86.avx512.mask.mul.ps.512 +  // CHECK: @llvm.x86.avx512.mul.ps.512    return _mm512_mul_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512 test_mm512_mask_mul_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {    // CHECK-LABEL: @test_mm512_mask_mul_round_ps -  // CHECK: @llvm.x86.avx512.mask.mul.ps.512 +  // CHECK: @llvm.x86.avx512.mul.ps.512 +  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}    return _mm512_mask_mul_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512 test_mm512_maskz_mul_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {    // CHECK-LABEL: @test_mm512_maskz_mul_round_ps -  // CHECK: @llvm.x86.avx512.mask.mul.ps.512 +  // CHECK: @llvm.x86.avx512.mul.ps.512 +  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}    return _mm512_maskz_mul_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512 test_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { @@ -2541,17 +2553,19 @@ __m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {  }  __m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) {    // CHECK-LABEL: @test_mm512_div_round_pd -  // CHECK: @llvm.x86.avx512.mask.div.pd.512 +  // CHECK: @llvm.x86.avx512.div.pd.512    return _mm512_div_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512d test_mm512_mask_div_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {    // CHECK-LABEL: @test_mm512_mask_div_round_pd -  // CHECK: @llvm.x86.avx512.mask.div.pd.512 +  // CHECK: @llvm.x86.avx512.div.pd.512 +  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}    return _mm512_mask_div_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512d test_mm512_maskz_div_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {    // CHECK-LABEL: @test_mm512_maskz_div_round_pd -  // CHECK: @llvm.x86.avx512.mask.div.pd.512 +  // CHECK: @llvm.x86.avx512.div.pd.512 +  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}    return _mm512_maskz_div_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512d test_mm512_div_pd(__m512d __a, __m512d __b) { @@ -2573,17 +2587,19 @@ __m512d test_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {  }  __m512 test_mm512_div_round_ps(__m512 __A, __m512 __B) {    // CHECK-LABEL: @test_mm512_div_round_ps -  // CHECK: @llvm.x86.avx512.mask.div.ps.512 +  // CHECK: @llvm.x86.avx512.div.ps.512    return _mm512_div_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512 test_mm512_mask_div_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {    // CHECK-LABEL: @test_mm512_mask_div_round_ps -  // CHECK: @llvm.x86.avx512.mask.div.ps.512 +  // CHECK: @llvm.x86.avx512.div.ps.512 +  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}    return _mm512_mask_div_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512 test_mm512_maskz_div_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {    // CHECK-LABEL: @test_mm512_maskz_div_round_ps -  // CHECK: @llvm.x86.avx512.mask.div.ps.512 +  // CHECK: @llvm.x86.avx512.div.ps.512 +  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}    return _mm512_maskz_div_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);   }  __m512 test_mm512_div_ps(__m512 __A, __m512 __B) {  | 

