summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp20
-rw-r--r--clang/lib/Headers/avx512fintrin.h32
-rw-r--r--clang/lib/Headers/avx512vlintrin.h34
-rw-r--r--clang/lib/Sema/SemaChecking.cpp6
4 files changed, 38 insertions, 54 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a126e75b19f..c911ab9fc5b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9222,6 +9222,26 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
makeArrayRef(Indices, NumElts),
"palignr");
}
+ case X86::BI__builtin_ia32_alignd128:
+ case X86::BI__builtin_ia32_alignd256:
+ case X86::BI__builtin_ia32_alignd512:
+ case X86::BI__builtin_ia32_alignq128:
+ case X86::BI__builtin_ia32_alignq256:
+ case X86::BI__builtin_ia32_alignq512: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+
+ // Mask the shift amount to width of two vectors.
+ ShiftVal &= (2 * NumElts) - 1;
+
+ uint32_t Indices[16];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i + ShiftVal;
+
+ return Builder.CreateShuffleVector(Ops[1], Ops[0],
+ makeArrayRef(Indices, NumElts),
+ "valign");
+ }
case X86::BI__builtin_ia32_vperm2f128_pd256:
case X86::BI__builtin_ia32_vperm2f128_ps256:
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 47a0c9d433f..5132edde65e 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -3465,16 +3465,8 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
}
#define _mm512_alignr_epi64(A, B, I) \
- (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
- (__v8di)(__m512i)(A), \
- ((int)(I) & 0x7) + 0, \
- ((int)(I) & 0x7) + 1, \
- ((int)(I) & 0x7) + 2, \
- ((int)(I) & 0x7) + 3, \
- ((int)(I) & 0x7) + 4, \
- ((int)(I) & 0x7) + 5, \
- ((int)(I) & 0x7) + 6, \
- ((int)(I) & 0x7) + 7)
+ (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), (int)(I))
#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
@@ -3487,24 +3479,8 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
(__v8di)_mm512_setzero_si512())
#define _mm512_alignr_epi32(A, B, I) \
- (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
- (__v16si)(__m512i)(A), \
- ((int)(I) & 0xf) + 0, \
- ((int)(I) & 0xf) + 1, \
- ((int)(I) & 0xf) + 2, \
- ((int)(I) & 0xf) + 3, \
- ((int)(I) & 0xf) + 4, \
- ((int)(I) & 0xf) + 5, \
- ((int)(I) & 0xf) + 6, \
- ((int)(I) & 0xf) + 7, \
- ((int)(I) & 0xf) + 8, \
- ((int)(I) & 0xf) + 9, \
- ((int)(I) & 0xf) + 10, \
- ((int)(I) & 0xf) + 11, \
- ((int)(I) & 0xf) + 12, \
- ((int)(I) & 0xf) + 13, \
- ((int)(I) & 0xf) + 14, \
- ((int)(I) & 0xf) + 15)
+ (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), (int)(I))
#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 2b9f14b99f3..e5963720fe1 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -8082,12 +8082,8 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
}
#define _mm_alignr_epi32(A, B, imm) \
- (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \
- (__v4si)(__m128i)(A), \
- ((int)(imm) & 0x3) + 0, \
- ((int)(imm) & 0x3) + 1, \
- ((int)(imm) & 0x3) + 2, \
- ((int)(imm) & 0x3) + 3)
+ (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (int)(imm))
#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
@@ -8100,16 +8096,8 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
(__v4si)_mm_setzero_si128())
#define _mm256_alignr_epi32(A, B, imm) \
- (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \
- (__v8si)(__m256i)(A), \
- ((int)(imm) & 0x7) + 0, \
- ((int)(imm) & 0x7) + 1, \
- ((int)(imm) & 0x7) + 2, \
- ((int)(imm) & 0x7) + 3, \
- ((int)(imm) & 0x7) + 4, \
- ((int)(imm) & 0x7) + 5, \
- ((int)(imm) & 0x7) + 6, \
- ((int)(imm) & 0x7) + 7)
+ (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), (int)(imm))
#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
@@ -8122,10 +8110,8 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
(__v8si)_mm256_setzero_si256())
#define _mm_alignr_epi64(A, B, imm) \
- (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \
- (__v2di)(__m128i)(A), \
- ((int)(imm) & 0x1) + 0, \
- ((int)(imm) & 0x1) + 1)
+ (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (int)(imm))
#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
@@ -8138,12 +8124,8 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
(__v2di)_mm_setzero_si128())
#define _mm256_alignr_epi64(A, B, imm) \
- (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \
- (__v4di)(__m256i)(A), \
- ((int)(imm) & 0x3) + 0, \
- ((int)(imm) & 0x3) + 1, \
- ((int)(imm) & 0x3) + 2, \
- ((int)(imm) & 0x3) + 3)
+ (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), (int)(imm))
#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 0c5be0506e0..419ac47b9b8 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2712,6 +2712,12 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
case X86::BI__builtin_ia32_palignr512:
+ case X86::BI__builtin_ia32_alignq512:
+ case X86::BI__builtin_ia32_alignd512:
+ case X86::BI__builtin_ia32_alignd128:
+ case X86::BI__builtin_ia32_alignd256:
+ case X86::BI__builtin_ia32_alignq128:
+ case X86::BI__builtin_ia32_alignq256:
case X86::BI__builtin_ia32_vcomisd:
case X86::BI__builtin_ia32_vcomiss:
case X86::BI__builtin_ia32_dbpsadbw128_mask:
OpenPOWER on IntegriCloud