diff options
| -rw-r--r-- | clang/include/clang/Basic/BuiltinsX86.def | 6 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 30 | ||||
| -rw-r--r-- | clang/lib/Headers/avxintrin.h | 20 | ||||
| -rw-r--r-- | clang/lib/Headers/emmintrin.h | 5 | ||||
| -rw-r--r-- | clang/lib/Headers/xmmintrin.h | 7 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 6 | 
6 files changed, 50 insertions, 24 deletions
| diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 3466e9b0358..0354945d98f 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -316,6 +316,7 @@ TARGET_BUILTIN(__builtin_ia32_rsqrtps, "V4fV4f", "nc", "sse")  TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "nc", "sse")  TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "nc", "sse")  TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "nc", "sse") +TARGET_BUILTIN(__builtin_ia32_shufps, "V4fV4fV4fIi", "nc", "sse")  TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "n", "sse2")  TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "nc", "sse2") @@ -327,6 +328,7 @@ TARGET_BUILTIN(__builtin_ia32_pshufhw, "V8sV8sIi", "nc", "sse2")  TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "nc", "sse2")  TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "nc", "sse2")  TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_shufpd, "V2dV2dV2di", "nc", "sse2")  TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "nc", "sse2")  TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "nc", "sse2")  TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "nc", "sse2") @@ -487,6 +489,8 @@ TARGET_BUILTIN(__builtin_ia32_blendpd256, "V4dV4dV4dIi", "nc", "avx")  TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "nc", "avx")  TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "nc", "avx")  TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "nc", "avx")  TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "nc", "avx")  TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "nc", "avx")  TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "nc", "avx") @@ -1536,6 +1540,8 @@ TARGET_BUILTIN(__builtin_ia32_shuf_f32x4, "V16fV16fV16fIi", "nc", "avx512f")  TARGET_BUILTIN(__builtin_ia32_shuf_f64x2, "V8dV8dV8dIi", "nc", "avx512f")  TARGET_BUILTIN(__builtin_ia32_shuf_i32x4, "V16iV16iV16iIi", "nc", "avx512f")  TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8LLiV8LLiV8LLiIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_shufpd512, "V8dV8dV8dIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_shufps512, "V16fV16fV16fIi", "nc", "avx512f")  TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256, "V8fV8fV8fIi", "nc", "avx512vl")  TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256, "V4dV4dV4dIi", "nc", "avx512vl")  TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256, "V8iV8iV8iIi", "nc", "avx512vl") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 741f36b095d..9d9a9427edb 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9403,6 +9403,36 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,                                         makeArrayRef(Indices, NumElts),                                         "permil");    } +  case X86::BI__builtin_ia32_shufpd: +  case X86::BI__builtin_ia32_shufpd256: +  case X86::BI__builtin_ia32_shufpd512: +  case X86::BI__builtin_ia32_shufps: +  case X86::BI__builtin_ia32_shufps256: +  case X86::BI__builtin_ia32_shufps512: { +    uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); +    llvm::Type *Ty = Ops[0]->getType(); +    unsigned NumElts = Ty->getVectorNumElements(); +    unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; +    unsigned NumLaneElts = NumElts / NumLanes; + +    // Splat the 8-bits of immediate 4 times to help the loop wrap around. +    Imm = (Imm & 0xff) * 0x01010101; + +    uint32_t Indices[16]; +    for (unsigned l = 0; l != NumElts; l += NumLaneElts) { +      for (unsigned i = 0; i != NumLaneElts; ++i) { +        unsigned Index = Imm % NumLaneElts; +        Imm /= NumLaneElts; +        if (i >= (NumLaneElts / 2)) +          Index += NumElts; +        Indices[l + i] = l + Index; +      } +    } + +    return Builder.CreateShuffleVector(Ops[0], Ops[1], +                                       makeArrayRef(Indices, NumElts), +                                       "shufp"); +  }    case X86::BI__builtin_ia32_palignr128:    case X86::BI__builtin_ia32_palignr256:    case X86::BI__builtin_ia32_palignr512: { diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 0b7813526ea..2aa3f5f6d6c 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -1516,16 +1516,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)  ///    11: Bits [127:96] and [255:224] are copied from the selected operand.  /// \returns A 256-bit vector of [8 x float] containing the shuffled values.  #define _mm256_shuffle_ps(a, b, mask) \ -  (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \ -                                  (__v8sf)(__m256)(b), \ -                                  0  + (((mask) >> 0) & 0x3), \ -                                  0  + (((mask) >> 2) & 0x3), \ -                                  8  + (((mask) >> 4) & 0x3), \ -                                  8  + (((mask) >> 6) & 0x3), \ -                                  4  + (((mask) >> 0) & 0x3), \ -                                  4  + (((mask) >> 2) & 0x3), \ -                                  12 + (((mask) >> 4) & 0x3), \ -                                  12 + (((mask) >> 6) & 0x3)) +  (__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \ +                                   (__v8sf)(__m256)(b), (int)(mask))  /// Selects four double-precision values from the 256-bit operands of  ///    [4 x double], as specified by the immediate value operand. @@ -1570,12 +1562,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)  ///    destination.  /// \returns A 256-bit vector of [4 x double] containing the shuffled values.  #define _mm256_shuffle_pd(a, b, mask) \ -  (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \ -                                   (__v4df)(__m256d)(b), \ -                                   0 + (((mask) >> 0) & 0x1), \ -                                   4 + (((mask) >> 1) & 0x1), \ -                                   2 + (((mask) >> 2) & 0x1), \ -                                   6 + (((mask) >> 3) & 0x1)) +  (__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \ +                                    (__v4df)(__m256d)(b), (int)(mask))  /* Compare */  #define _CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */ diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 8942790d5c5..6c64d41517a 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -4757,9 +4757,8 @@ _mm_movemask_pd(__m128d __a)  ///    Bit[1] = 1: upper element of \a b copied to upper element of result. \n  /// \returns A 128-bit vector of [2 x double] containing the shuffled values.  #define _mm_shuffle_pd(a, b, i) \ -  (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ -                                   0 + (((i) >> 0) & 0x1), \ -                                   2 + (((i) >> 1) & 0x1)) +  (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \ +                                 (int)(i))  /// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit  ///    floating-point vector of [4 x float]. diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index 92db4779bea..8d82da3ce53 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2605,11 +2605,8 @@ void _mm_setcsr(unsigned int __i);  ///    11: Bits [127:96] copied from the specified operand.  /// \returns A 128-bit vector of [4 x float] containing the shuffled values.  #define _mm_shuffle_ps(a, b, mask) \ -  (__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ -                                  0 + (((mask) >> 0) & 0x3), \ -                                  0 + (((mask) >> 2) & 0x3), \ -                                  4 + (((mask) >> 4) & 0x3), \ -                                  4 + (((mask) >> 6) & 0x3)) +  (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \ +                                (int)(mask))  /// Unpacks the high-order (index 2,3) values from two 128-bit vectors of  ///    [4 x float] and interleaves them into a 128-bit vector of [4 x float]. diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 020d82edf72..48671b44326 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2654,6 +2654,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {      break;    case X86::BI__builtin_ia32_sha1rnds4:    case X86::BI__builtin_ia32_blendpd: +  case X86::BI__builtin_ia32_shufpd:    case X86::BI__builtin_ia32_vec_set_v4hi:    case X86::BI__builtin_ia32_vec_set_v4si:    case X86::BI__builtin_ia32_vec_set_v4di: @@ -2721,6 +2722,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {    case X86::BI__builtin_ia32_pblendd128:    case X86::BI__builtin_ia32_blendps:    case X86::BI__builtin_ia32_blendpd256: +  case X86::BI__builtin_ia32_shufpd256:    case X86::BI__builtin_ia32_roundss:    case X86::BI__builtin_ia32_roundsd:    case X86::BI__builtin_ia32_rangepd128_mask: @@ -2824,6 +2826,10 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {    case X86::BI__builtin_ia32_shuf_f64x2:    case X86::BI__builtin_ia32_shuf_i32x4:    case X86::BI__builtin_ia32_shuf_i64x2: +  case X86::BI__builtin_ia32_shufpd512: +  case X86::BI__builtin_ia32_shufps: +  case X86::BI__builtin_ia32_shufps256: +  case X86::BI__builtin_ia32_shufps512:    case X86::BI__builtin_ia32_dbpsadbw128_mask:    case X86::BI__builtin_ia32_dbpsadbw256_mask:    case X86::BI__builtin_ia32_dbpsadbw512_mask: | 

