summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-08 00:59:27 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-08 00:59:27 +0000
commitacf56019614b9092b596b7fff5995353b193d26f (patch)
tree7bbea90c7901e077c07d40e0c4d185a695744ee5 /clang/lib
parenta2670df602c58117a5b4cea65d175eef176cd604 (diff)
downloadbcm5719-llvm-acf56019614b9092b596b7fff5995353b193d26f.tar.gz
bcm5719-llvm-acf56019614b9092b596b7fff5995353b193d26f.zip
[X86] Add builtins for vpermilps/pd instructions to enable target feature checking.
llvm-svn: 334256
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp27
-rw-r--r--clang/lib/Headers/avx512fintrin.h30
-rw-r--r--clang/lib/Headers/avxintrin.h27
-rw-r--r--clang/lib/Sema/SemaChecking.cpp6
4 files changed, 39 insertions, 51 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 00020584955..4331005cb35 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9256,6 +9256,33 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
makeArrayRef(Indices, NumElts),
"blend");
}
+ case X86::BI__builtin_ia32_vpermilpd:
+ case X86::BI__builtin_ia32_vpermilps:
+ case X86::BI__builtin_ia32_vpermilpd256:
+ case X86::BI__builtin_ia32_vpermilps256:
+ case X86::BI__builtin_ia32_vpermilpd512:
+ case X86::BI__builtin_ia32_vpermilps512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+ unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[16];
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ Indices[i + l] = (Imm % NumLaneElts) + l;
+ Imm /= NumLaneElts;
+ }
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
+ makeArrayRef(Indices, NumElts),
+ "permil");
+ }
case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
case X86::BI__builtin_ia32_palignr512: {
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 074bf61c7ba..ccc445a6acd 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -6302,16 +6302,7 @@ _mm_cvttss_u64 (__m128 __A)
#endif
#define _mm512_permute_pd(X, C) \
- (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \
- (__v8df)_mm512_undefined_pd(), \
- 0 + (((C) >> 0) & 0x1), \
- 0 + (((C) >> 1) & 0x1), \
- 2 + (((C) >> 2) & 0x1), \
- 2 + (((C) >> 3) & 0x1), \
- 4 + (((C) >> 4) & 0x1), \
- 4 + (((C) >> 5) & 0x1), \
- 6 + (((C) >> 6) & 0x1), \
- 6 + (((C) >> 7) & 0x1))
+ (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))
#define _mm512_mask_permute_pd(W, U, X, C) \
(__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
@@ -6324,24 +6315,7 @@ _mm_cvttss_u64 (__m128 __A)
(__v8df)_mm512_setzero_pd())
#define _mm512_permute_ps(X, C) \
- (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \
- (__v16sf)_mm512_undefined_ps(), \
- 0 + (((C) >> 0) & 0x3), \
- 0 + (((C) >> 2) & 0x3), \
- 0 + (((C) >> 4) & 0x3), \
- 0 + (((C) >> 6) & 0x3), \
- 4 + (((C) >> 0) & 0x3), \
- 4 + (((C) >> 2) & 0x3), \
- 4 + (((C) >> 4) & 0x3), \
- 4 + (((C) >> 6) & 0x3), \
- 8 + (((C) >> 0) & 0x3), \
- 8 + (((C) >> 2) & 0x3), \
- 8 + (((C) >> 4) & 0x3), \
- 8 + (((C) >> 6) & 0x3), \
- 12 + (((C) >> 0) & 0x3), \
- 12 + (((C) >> 2) & 0x3), \
- 12 + (((C) >> 4) & 0x3), \
- 12 + (((C) >> 6) & 0x3))
+ (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))
#define _mm512_mask_permute_ps(W, U, X, C) \
(__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index 7e3c51ffb69..7c85893ba13 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -998,9 +998,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// returned vector.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
#define _mm_permute_pd(A, C) \
- (__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \
- (__v2df)_mm_undefined_pd(), \
- ((C) >> 0) & 0x1, ((C) >> 1) & 0x1)
+ (__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C))
/// Copies the values in a 256-bit vector of [4 x double] as specified by
/// the immediate integer operand.
@@ -1040,12 +1038,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// returned vector.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
#define _mm256_permute_pd(A, C) \
- (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
- (__v4df)_mm256_undefined_pd(), \
- 0 + (((C) >> 0) & 0x1), \
- 0 + (((C) >> 1) & 0x1), \
- 2 + (((C) >> 2) & 0x1), \
- 2 + (((C) >> 3) & 0x1))
+ (__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C))
/// Copies the values in a 128-bit vector of [4 x float] as specified by
/// the immediate integer operand.
@@ -1101,10 +1094,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// returned vector.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
#define _mm_permute_ps(A, C) \
- (__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \
- (__v4sf)_mm_undefined_ps(), \
- ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
- ((C) >> 4) & 0x3, ((C) >> 6) & 0x3)
+ (__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C))
/// Copies the values in a 256-bit vector of [8 x float] as specified by
/// the immediate integer operand.
@@ -1196,16 +1186,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
/// returned vector.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
#define _mm256_permute_ps(A, C) \
- (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
- (__v8sf)_mm256_undefined_ps(), \
- 0 + (((C) >> 0) & 0x3), \
- 0 + (((C) >> 2) & 0x3), \
- 0 + (((C) >> 4) & 0x3), \
- 0 + (((C) >> 6) & 0x3), \
- 4 + (((C) >> 0) & 0x3), \
- 4 + (((C) >> 2) & 0x3), \
- 4 + (((C) >> 4) & 0x3), \
- 4 + (((C) >> 6) & 0x3))
+ (__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C))
/// Permutes 128-bit data values stored in two 256-bit vectors of
/// [4 x double], as specified by the immediate integer operand.
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index db004607cc0..d16921c6c70 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2612,6 +2612,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vec_set_v2di:
i = 2; l = 0; u = 1;
break;
+ case X86::BI__builtin_ia32_vpermilpd:
case X86::BI__builtin_ia32_vec_ext_v4hi:
case X86::BI__builtin_ia32_vec_ext_v4si:
case X86::BI__builtin_ia32_vec_ext_v4sf:
@@ -2676,6 +2677,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vec_set_v8si:
i = 2; l = 0; u = 7;
break;
+ case X86::BI__builtin_ia32_vpermilpd256:
case X86::BI__builtin_ia32_roundps:
case X86::BI__builtin_ia32_roundpd:
case X86::BI__builtin_ia32_roundps256:
@@ -2721,6 +2723,10 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vec_set_v32qi:
i = 2; l = 0; u = 31;
break;
+ case X86::BI__builtin_ia32_vpermilps:
+ case X86::BI__builtin_ia32_vpermilps256:
+ case X86::BI__builtin_ia32_vpermilpd512:
+ case X86::BI__builtin_ia32_vpermilps512:
case X86::BI__builtin_ia32_vcvtps2ph:
case X86::BI__builtin_ia32_vcvtps2ph_mask:
case X86::BI__builtin_ia32_vcvtps2ph256:
OpenPOWER on IntegriCloud