summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def6
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp30
-rw-r--r--clang/lib/Headers/avxintrin.h20
-rw-r--r--clang/lib/Headers/emmintrin.h5
-rw-r--r--clang/lib/Headers/xmmintrin.h7
-rw-r--r--clang/lib/Sema/SemaChecking.cpp6
6 files changed, 50 insertions, 24 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 3466e9b0358..0354945d98f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -316,6 +316,7 @@ TARGET_BUILTIN(__builtin_ia32_rsqrtps, "V4fV4f", "nc", "sse")
TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "nc", "sse")
TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "nc", "sse")
TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "nc", "sse")
+TARGET_BUILTIN(__builtin_ia32_shufps, "V4fV4fV4fIi", "nc", "sse")
TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "n", "sse2")
TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "nc", "sse2")
@@ -327,6 +328,7 @@ TARGET_BUILTIN(__builtin_ia32_pshufhw, "V8sV8sIi", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_shufpd, "V2dV2dV2di", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "nc", "sse2")
@@ -487,6 +489,8 @@ TARGET_BUILTIN(__builtin_ia32_blendpd256, "V4dV4dV4dIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "nc", "avx")
@@ -1536,6 +1540,8 @@ TARGET_BUILTIN(__builtin_ia32_shuf_f32x4, "V16fV16fV16fIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_shuf_f64x2, "V8dV8dV8dIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_shuf_i32x4, "V16iV16iV16iIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8LLiV8LLiV8LLiIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_shufpd512, "V8dV8dV8dIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_shufps512, "V16fV16fV16fIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256, "V8fV8fV8fIi", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256, "V4dV4dV4dIi", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256, "V8iV8iV8iIi", "nc", "avx512vl")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 741f36b095d..9d9a9427edb 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9403,6 +9403,36 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
makeArrayRef(Indices, NumElts),
"permil");
}
+ case X86::BI__builtin_ia32_shufpd:
+ case X86::BI__builtin_ia32_shufpd256:
+ case X86::BI__builtin_ia32_shufpd512:
+ case X86::BI__builtin_ia32_shufps:
+ case X86::BI__builtin_ia32_shufps256:
+ case X86::BI__builtin_ia32_shufps512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+ unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[16];
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ unsigned Index = Imm % NumLaneElts;
+ Imm /= NumLaneElts;
+ if (i >= (NumLaneElts / 2))
+ Index += NumElts;
+ Indices[l + i] = l + Index;
+ }
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], Ops[1],
+ makeArrayRef(Indices, NumElts),
+ "shufp");
+ }
case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
case X86::BI__builtin_ia32_palignr512: {
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 0b7813526ea..2aa3f5f6d6c 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -1516,16 +1516,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// 11: Bits [127:96] and [255:224] are copied from the selected operand.
/// \returns A 256-bit vector of [8 x float] containing the shuffled values.
#define _mm256_shuffle_ps(a, b, mask) \
- (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
- (__v8sf)(__m256)(b), \
- 0 + (((mask) >> 0) & 0x3), \
- 0 + (((mask) >> 2) & 0x3), \
- 8 + (((mask) >> 4) & 0x3), \
- 8 + (((mask) >> 6) & 0x3), \
- 4 + (((mask) >> 0) & 0x3), \
- 4 + (((mask) >> 2) & 0x3), \
- 12 + (((mask) >> 4) & 0x3), \
- 12 + (((mask) >> 6) & 0x3))
+ (__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \
+ (__v8sf)(__m256)(b), (int)(mask))
/// Selects four double-precision values from the 256-bit operands of
/// [4 x double], as specified by the immediate value operand.
@@ -1570,12 +1562,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
/// destination.
/// \returns A 256-bit vector of [4 x double] containing the shuffled values.
#define _mm256_shuffle_pd(a, b, mask) \
- (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
- (__v4df)(__m256d)(b), \
- 0 + (((mask) >> 0) & 0x1), \
- 4 + (((mask) >> 1) & 0x1), \
- 2 + (((mask) >> 2) & 0x1), \
- 6 + (((mask) >> 3) & 0x1))
+ (__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \
+ (__v4df)(__m256d)(b), (int)(mask))
/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 8942790d5c5..6c64d41517a 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -4757,9 +4757,8 @@ _mm_movemask_pd(__m128d __a)
/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n
/// \returns A 128-bit vector of [2 x double] containing the shuffled values.
#define _mm_shuffle_pd(a, b, i) \
- (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
- 0 + (((i) >> 0) & 0x1), \
- 2 + (((i) >> 1) & 0x1))
+ (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
+ (int)(i))
/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
/// floating-point vector of [4 x float].
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 92db4779bea..8d82da3ce53 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2605,11 +2605,8 @@ void _mm_setcsr(unsigned int __i);
/// 11: Bits [127:96] copied from the specified operand.
/// \returns A 128-bit vector of [4 x float] containing the shuffled values.
#define _mm_shuffle_ps(a, b, mask) \
- (__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
- 0 + (((mask) >> 0) & 0x3), \
- 0 + (((mask) >> 2) & 0x3), \
- 4 + (((mask) >> 4) & 0x3), \
- 4 + (((mask) >> 6) & 0x3))
+ (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
+ (int)(mask))
/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 020d82edf72..48671b44326 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2654,6 +2654,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
break;
case X86::BI__builtin_ia32_sha1rnds4:
case X86::BI__builtin_ia32_blendpd:
+ case X86::BI__builtin_ia32_shufpd:
case X86::BI__builtin_ia32_vec_set_v4hi:
case X86::BI__builtin_ia32_vec_set_v4si:
case X86::BI__builtin_ia32_vec_set_v4di:
@@ -2721,6 +2722,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_pblendd128:
case X86::BI__builtin_ia32_blendps:
case X86::BI__builtin_ia32_blendpd256:
+ case X86::BI__builtin_ia32_shufpd256:
case X86::BI__builtin_ia32_roundss:
case X86::BI__builtin_ia32_roundsd:
case X86::BI__builtin_ia32_rangepd128_mask:
@@ -2824,6 +2826,10 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_shuf_f64x2:
case X86::BI__builtin_ia32_shuf_i32x4:
case X86::BI__builtin_ia32_shuf_i64x2:
+ case X86::BI__builtin_ia32_shufpd512:
+ case X86::BI__builtin_ia32_shufps:
+ case X86::BI__builtin_ia32_shufps256:
+ case X86::BI__builtin_ia32_shufps512:
case X86::BI__builtin_ia32_dbpsadbw128_mask:
case X86::BI__builtin_ia32_dbpsadbw256_mask:
case X86::BI__builtin_ia32_dbpsadbw512_mask:
OpenPOWER on IntegriCloud