diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 20 | ||||
-rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 32 | ||||
-rw-r--r-- | clang/lib/Headers/avx512vlintrin.h | 34 | ||||
-rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 6 |
4 files changed, 38 insertions, 54 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a126e75b19f..c911ab9fc5b 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9222,6 +9222,26 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, makeArrayRef(Indices, NumElts), "palignr"); } + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: + case X86::BI__builtin_ia32_alignq512: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + + // Mask the shift amount to width of two vectors. + ShiftVal &= (2 * NumElts) - 1; + + uint32_t Indices[16]; + for (unsigned i = 0; i != NumElts; ++i) + Indices[i] = i + ShiftVal; + + return Builder.CreateShuffleVector(Ops[1], Ops[0], + makeArrayRef(Indices, NumElts), + "valign"); + } case X86::BI__builtin_ia32_vperm2f128_pd256: case X86::BI__builtin_ia32_vperm2f128_ps256: diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 47a0c9d433f..5132edde65e 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -3465,16 +3465,8 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, } #define _mm512_alignr_epi64(A, B, I) \ - (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \ - (__v8di)(__m512i)(A), \ - ((int)(I) & 0x7) + 0, \ - ((int)(I) & 0x7) + 1, \ - ((int)(I) & 0x7) + 2, \ - ((int)(I) & 0x7) + 3, \ - ((int)(I) & 0x7) + 4, \ - ((int)(I) & 0x7) + 5, \ - ((int)(I) & 0x7) + 6, \ - ((int)(I) & 0x7) + 7) + (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(I)) #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ @@ -3487,24 +3479,8 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, (__v8di)_mm512_setzero_si512()) #define _mm512_alignr_epi32(A, B, I) \ - (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \ - (__v16si)(__m512i)(A), \ - ((int)(I) & 0xf) + 0, \ - ((int)(I) & 0xf) + 1, \ - ((int)(I) & 0xf) + 2, \ - ((int)(I) & 0xf) + 3, \ - ((int)(I) & 0xf) + 4, \ - ((int)(I) & 0xf) + 5, \ - ((int)(I) & 0xf) + 6, \ - ((int)(I) & 0xf) + 7, \ - ((int)(I) & 0xf) + 8, \ - ((int)(I) & 0xf) + 9, \ - ((int)(I) & 0xf) + 10, \ - ((int)(I) & 0xf) + 11, \ - ((int)(I) & 0xf) + 12, \ - ((int)(I) & 0xf) + 13, \ - ((int)(I) & 0xf) + 14, \ - ((int)(I) & 0xf) + 15) + (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), (int)(I)) #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h index 2b9f14b99f3..e5963720fe1 100644 --- a/clang/lib/Headers/avx512vlintrin.h +++ b/clang/lib/Headers/avx512vlintrin.h @@ -8082,12 +8082,8 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) } #define _mm_alignr_epi32(A, B, imm) \ - (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \ - (__v4si)(__m128i)(A), \ - ((int)(imm) & 0x3) + 0, \ - ((int)(imm) & 0x3) + 1, \ - ((int)(imm) & 0x3) + 2, \ - ((int)(imm) & 0x3) + 3) + (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), (int)(imm)) #define _mm_mask_alignr_epi32(W, U, A, B, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ @@ -8100,16 +8096,8 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) (__v4si)_mm_setzero_si128()) #define _mm256_alignr_epi32(A, B, imm) \ - (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \ - (__v8si)(__m256i)(A), \ - ((int)(imm) & 0x7) + 0, \ - ((int)(imm) & 0x7) + 1, \ - ((int)(imm) & 0x7) + 2, \ - ((int)(imm) & 0x7) + 3, \ - ((int)(imm) & 0x7) + 4, \ - ((int)(imm) & 0x7) + 5, \ - ((int)(imm) & 0x7) + 6, \ - ((int)(imm) & 0x7) + 7) + (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), (int)(imm)) #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ @@ -8122,10 +8110,8 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) (__v8si)_mm256_setzero_si256()) #define _mm_alignr_epi64(A, B, imm) \ - (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \ - (__v2di)(__m128i)(A), \ - ((int)(imm) & 0x1) + 0, \ - ((int)(imm) & 0x1) + 1) + (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(imm)) #define _mm_mask_alignr_epi64(W, U, A, B, imm) \ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ @@ -8138,12 +8124,8 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) (__v2di)_mm_setzero_si128()) #define _mm256_alignr_epi64(A, B, imm) \ - (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \ - (__v4di)(__m256i)(A), \ - ((int)(imm) & 0x3) + 0, \ - ((int)(imm) & 0x3) + 1, \ - ((int)(imm) & 0x3) + 2, \ - ((int)(imm) & 0x3) + 3) + (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(imm)) #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 0c5be0506e0..419ac47b9b8 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2712,6 +2712,12 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: case X86::BI__builtin_ia32_palignr512: + case X86::BI__builtin_ia32_alignq512: + case X86::BI__builtin_ia32_alignd512: + case X86::BI__builtin_ia32_alignd128: + case X86::BI__builtin_ia32_alignd256: + case X86::BI__builtin_ia32_alignq128: + case X86::BI__builtin_ia32_alignq256: case X86::BI__builtin_ia32_vcomisd: case X86::BI__builtin_ia32_vcomiss: case X86::BI__builtin_ia32_dbpsadbw128_mask: |