summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-08 06:13:16 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-08 06:13:16 +0000
commit03de166ccd128a402f3bf12fa5b2ac06455331b5 (patch)
treefdfedb2c52d90386fe78265f4e4dea48f760f107 /clang/lib
parent573dab1553db626ed952a8e1c359d2237b3f8367 (diff)
downloadbcm5719-llvm-03de166ccd128a402f3bf12fa5b2ac06455331b5.tar.gz
bcm5719-llvm-03de166ccd128a402f3bf12fa5b2ac06455331b5.zip
[X86] Add builtins for pshufd, pshuflw, and pshufhw to enable target feature and immediate range checking.
llvm-svn: 334265
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp51
-rw-r--r--clang/lib/Headers/avx2intrin.h37
-rw-r--r--clang/lib/Headers/avx512bwintrin.h46
-rw-r--r--clang/lib/Headers/avx512fintrin.h19
-rw-r--r--clang/lib/Headers/emmintrin.h19
-rw-r--r--clang/lib/Sema/SemaChecking.cpp9
6 files changed, 69 insertions, 112 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 025b34e809c..741f36b095d 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9325,6 +9325,57 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
makeArrayRef(Indices, NumElts),
"blend");
}
+ case X86::BI__builtin_ia32_pshuflw:
+ case X86::BI__builtin_ia32_pshuflw256:
+ case X86::BI__builtin_ia32_pshuflw512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[32];
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ for (unsigned i = 0; i != 4; ++i) {
+ Indices[l + i] = l + (Imm & 3);
+ Imm >>= 2;
+ }
+ for (unsigned i = 4; i != 8; ++i)
+ Indices[l + i] = l + i;
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
+ makeArrayRef(Indices, NumElts),
+ "pshuflw");
+ }
+ case X86::BI__builtin_ia32_pshufhw:
+ case X86::BI__builtin_ia32_pshufhw256:
+ case X86::BI__builtin_ia32_pshufhw512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[32];
+ for (unsigned l = 0; l != NumElts; l += 8) {
+ for (unsigned i = 0; i != 4; ++i)
+ Indices[l + i] = l + i;
+ for (unsigned i = 4; i != 8; ++i) {
+ Indices[l + i] = l + 4 + (Imm & 3);
+ Imm >>= 2;
+ }
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
+ makeArrayRef(Indices, NumElts),
+ "pshufhw");
+ }
+ case X86::BI__builtin_ia32_pshufd:
+ case X86::BI__builtin_ia32_pshufd256:
+ case X86::BI__builtin_ia32_pshufd512:
case X86::BI__builtin_ia32_vpermilpd:
case X86::BI__builtin_ia32_vpermilps:
case X86::BI__builtin_ia32_vpermilpd256:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 3867af08ccc..e7fdd2b1e59 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -488,44 +488,13 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
}
#define _mm256_shuffle_epi32(a, imm) \
- (__m256i)__builtin_shufflevector((__v8si)(__m256i)(a), \
- (__v8si)_mm256_undefined_si256(), \
- 0 + (((imm) >> 0) & 0x3), \
- 0 + (((imm) >> 2) & 0x3), \
- 0 + (((imm) >> 4) & 0x3), \
- 0 + (((imm) >> 6) & 0x3), \
- 4 + (((imm) >> 0) & 0x3), \
- 4 + (((imm) >> 2) & 0x3), \
- 4 + (((imm) >> 4) & 0x3), \
- 4 + (((imm) >> 6) & 0x3))
+ (__m256i)__builtin_ia32_pshufd256((__v8si)(__m256i)(a), (int)(imm))
#define _mm256_shufflehi_epi16(a, imm) \
- (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
- (__v16hi)_mm256_undefined_si256(), \
- 0, 1, 2, 3, \
- 4 + (((imm) >> 0) & 0x3), \
- 4 + (((imm) >> 2) & 0x3), \
- 4 + (((imm) >> 4) & 0x3), \
- 4 + (((imm) >> 6) & 0x3), \
- 8, 9, 10, 11, \
- 12 + (((imm) >> 0) & 0x3), \
- 12 + (((imm) >> 2) & 0x3), \
- 12 + (((imm) >> 4) & 0x3), \
- 12 + (((imm) >> 6) & 0x3))
+ (__m256i)__builtin_ia32_pshufhw256((__v16hi)(__m256i)(a), (int)(imm))
#define _mm256_shufflelo_epi16(a, imm) \
- (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
- (__v16hi)_mm256_undefined_si256(), \
- 0 + (((imm) >> 0) & 0x3), \
- 0 + (((imm) >> 2) & 0x3), \
- 0 + (((imm) >> 4) & 0x3), \
- 0 + (((imm) >> 6) & 0x3), \
- 4, 5, 6, 7, \
- 8 + (((imm) >> 0) & 0x3), \
- 8 + (((imm) >> 2) & 0x3), \
- 8 + (((imm) >> 4) & 0x3), \
- 8 + (((imm) >> 6) & 0x3), \
- 12, 13, 14, 15)
+ (__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_sign_epi8(__m256i __a, __m256i __b)
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index aabaabd648d..0e21a57f311 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1287,28 +1287,7 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
#define _mm512_shufflehi_epi16(A, imm) \
- (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \
- (__v32hi)_mm512_undefined_epi32(), \
- 0, 1, 2, 3, \
- 4 + (((imm) >> 0) & 0x3), \
- 4 + (((imm) >> 2) & 0x3), \
- 4 + (((imm) >> 4) & 0x3), \
- 4 + (((imm) >> 6) & 0x3), \
- 8, 9, 10, 11, \
- 12 + (((imm) >> 0) & 0x3), \
- 12 + (((imm) >> 2) & 0x3), \
- 12 + (((imm) >> 4) & 0x3), \
- 12 + (((imm) >> 6) & 0x3), \
- 16, 17, 18, 19, \
- 20 + (((imm) >> 0) & 0x3), \
- 20 + (((imm) >> 2) & 0x3), \
- 20 + (((imm) >> 4) & 0x3), \
- 20 + (((imm) >> 6) & 0x3), \
- 24, 25, 26, 27, \
- 28 + (((imm) >> 0) & 0x3), \
- 28 + (((imm) >> 2) & 0x3), \
- 28 + (((imm) >> 4) & 0x3), \
- 28 + (((imm) >> 6) & 0x3))
+ (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm))
#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \
(__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
@@ -1323,28 +1302,7 @@ _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
(__v32hi)_mm512_setzero_si512())
#define _mm512_shufflelo_epi16(A, imm) \
- (__m512i)__builtin_shufflevector((__v32hi)(__m512i)(A), \
- (__v32hi)_mm512_undefined_epi32(), \
- 0 + (((imm) >> 0) & 0x3), \
- 0 + (((imm) >> 2) & 0x3), \
- 0 + (((imm) >> 4) & 0x3), \
- 0 + (((imm) >> 6) & 0x3), \
- 4, 5, 6, 7, \
- 8 + (((imm) >> 0) & 0x3), \
- 8 + (((imm) >> 2) & 0x3), \
- 8 + (((imm) >> 4) & 0x3), \
- 8 + (((imm) >> 6) & 0x3), \
- 12, 13, 14, 15, \
- 16 + (((imm) >> 0) & 0x3), \
- 16 + (((imm) >> 2) & 0x3), \
- 16 + (((imm) >> 4) & 0x3), \
- 16 + (((imm) >> 6) & 0x3), \
- 20, 21, 22, 23, \
- 24 + (((imm) >> 0) & 0x3), \
- 24 + (((imm) >> 2) & 0x3), \
- 24 + (((imm) >> 4) & 0x3), \
- 24 + (((imm) >> 6) & 0x3), \
- 28, 29, 30, 31)
+ (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm))
#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 4ae235e6330..fe730ac27f2 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -8849,24 +8849,7 @@ _mm_maskz_load_sd (__mmask8 __U, const double* __A)
}
#define _mm512_shuffle_epi32(A, I) \
- (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
- (__v16si)_mm512_undefined_epi32(), \
- 0 + (((I) >> 0) & 0x3), \
- 0 + (((I) >> 2) & 0x3), \
- 0 + (((I) >> 4) & 0x3), \
- 0 + (((I) >> 6) & 0x3), \
- 4 + (((I) >> 0) & 0x3), \
- 4 + (((I) >> 2) & 0x3), \
- 4 + (((I) >> 4) & 0x3), \
- 4 + (((I) >> 6) & 0x3), \
- 8 + (((I) >> 0) & 0x3), \
- 8 + (((I) >> 2) & 0x3), \
- 8 + (((I) >> 4) & 0x3), \
- 8 + (((I) >> 6) & 0x3), \
- 12 + (((I) >> 0) & 0x3), \
- 12 + (((I) >> 2) & 0x3), \
- 12 + (((I) >> 4) & 0x3), \
- 12 + (((I) >> 6) & 0x3))
+ (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))
#define _mm512_mask_shuffle_epi32(W, U, A, I) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index ad836b1e637..8942790d5c5 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -4343,10 +4343,7 @@ _mm_movemask_epi8(__m128i __a)
/// 11: assign values from bits [127:96] of \a a.
/// \returns A 128-bit integer vector containing the shuffled values.
#define _mm_shuffle_epi32(a, imm) \
- (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
- (__v4si)_mm_undefined_si128(), \
- ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
- ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3)
+ (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))
/// Constructs a 128-bit integer vector by shuffling four lower 16-bit
/// elements of a 128-bit integer vector of [8 x i16], using the immediate
@@ -4376,11 +4373,7 @@ _mm_movemask_epi8(__m128i __a)
/// 11: assign values from bits [63:48] of \a a. \n
/// \returns A 128-bit integer vector containing the shuffled values.
#define _mm_shufflelo_epi16(a, imm) \
- (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
- (__v8hi)_mm_undefined_si128(), \
- ((imm) >> 0) & 0x3, ((imm) >> 2) & 0x3, \
- ((imm) >> 4) & 0x3, ((imm) >> 6) & 0x3, \
- 4, 5, 6, 7)
+ (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))
/// Constructs a 128-bit integer vector by shuffling four upper 16-bit
/// elements of a 128-bit integer vector of [8 x i16], using the immediate
@@ -4410,13 +4403,7 @@ _mm_movemask_epi8(__m128i __a)
/// 11: assign values from bits [127:112] of \a a. \n
/// \returns A 128-bit integer vector containing the shuffled values.
#define _mm_shufflehi_epi16(a, imm) \
- (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
- (__v8hi)_mm_undefined_si128(), \
- 0, 1, 2, 3, \
- 4 + (((imm) >> 0) & 0x3), \
- 4 + (((imm) >> 2) & 0x3), \
- 4 + (((imm) >> 4) & 0x3), \
- 4 + (((imm) >> 6) & 0x3))
+ (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))
/// Unpacks the high-order (index 8-15) values from two 128-bit vectors
/// of [16 x i8] and interleaves them into a 128-bit vector of [16 x i8].
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 4f5405935e2..020d82edf72 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2759,6 +2759,15 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vpermilps256:
case X86::BI__builtin_ia32_vpermilpd512:
case X86::BI__builtin_ia32_vpermilps512:
+ case X86::BI__builtin_ia32_pshufd:
+ case X86::BI__builtin_ia32_pshufd256:
+ case X86::BI__builtin_ia32_pshufd512:
+ case X86::BI__builtin_ia32_pshufhw:
+ case X86::BI__builtin_ia32_pshufhw256:
+ case X86::BI__builtin_ia32_pshufhw512:
+ case X86::BI__builtin_ia32_pshuflw:
+ case X86::BI__builtin_ia32_pshuflw256:
+ case X86::BI__builtin_ia32_pshuflw512:
case X86::BI__builtin_ia32_vcvtps2ph:
case X86::BI__builtin_ia32_vcvtps2ph_mask:
case X86::BI__builtin_ia32_vcvtps2ph256:
OpenPOWER on IntegriCloud