diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-06-08 00:59:27 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-06-08 00:59:27 +0000 |
| commit | acf56019614b9092b596b7fff5995353b193d26f (patch) | |
| tree | 7bbea90c7901e077c07d40e0c4d185a695744ee5 /clang/lib | |
| parent | a2670df602c58117a5b4cea65d175eef176cd604 (diff) | |
| download | bcm5719-llvm-acf56019614b9092b596b7fff5995353b193d26f.tar.gz bcm5719-llvm-acf56019614b9092b596b7fff5995353b193d26f.zip | |
[X86] Add builtins for vpermilps/pd instructions to enable target feature checking.
llvm-svn: 334256
Diffstat (limited to 'clang/lib')
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 27 | ||||
| -rw-r--r-- | clang/lib/Headers/avx512fintrin.h | 30 | ||||
| -rw-r--r-- | clang/lib/Headers/avxintrin.h | 27 | ||||
| -rw-r--r-- | clang/lib/Sema/SemaChecking.cpp | 6 |
4 files changed, 39 insertions, 51 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 00020584955..4331005cb35 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9256,6 +9256,33 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, makeArrayRef(Indices, NumElts), "blend"); } + case X86::BI__builtin_ia32_vpermilpd: + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilpd256: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilpd512: + case X86::BI__builtin_ia32_vpermilps512: { + uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); + llvm::Type *Ty = Ops[0]->getType(); + unsigned NumElts = Ty->getVectorNumElements(); + unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; + unsigned NumLaneElts = NumElts / NumLanes; + + // Splat the 8-bits of immediate 4 times to help the loop wrap around. + Imm = (Imm & 0xff) * 0x01010101; + + uint32_t Indices[16]; + for (unsigned l = 0; l != NumElts; l += NumLaneElts) { + for (unsigned i = 0; i != NumLaneElts; ++i) { + Indices[i + l] = (Imm % NumLaneElts) + l; + Imm /= NumLaneElts; + } + } + + return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty), + makeArrayRef(Indices, NumElts), + "permil"); + } case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: case X86::BI__builtin_ia32_palignr512: { diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 074bf61c7ba..ccc445a6acd 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -6302,16 +6302,7 @@ _mm_cvttss_u64 (__m128 __A) #endif #define _mm512_permute_pd(X, C) \ - (__m512d)__builtin_shufflevector((__v8df)(__m512d)(X), \ - (__v8df)_mm512_undefined_pd(), \ - 0 + (((C) >> 0) & 0x1), \ - 0 + (((C) >> 1) & 0x1), \ - 2 + (((C) >> 2) & 0x1), \ - 2 + (((C) >> 3) & 0x1), \ - 4 + (((C) >> 4) & 0x1), \ - 4 + (((C) >> 5) & 0x1), \ - 6 + (((C) >> 6) & 0x1), \ - 6 + (((C) >> 7) & 0x1)) + (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)) #define _mm512_mask_permute_pd(W, U, X, C) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ @@ -6324,24 +6315,7 @@ _mm_cvttss_u64 (__m128 __A) (__v8df)_mm512_setzero_pd()) #define _mm512_permute_ps(X, C) \ - (__m512)__builtin_shufflevector((__v16sf)(__m512)(X), \ - (__v16sf)_mm512_undefined_ps(), \ - 0 + (((C) >> 0) & 0x3), \ - 0 + (((C) >> 2) & 0x3), \ - 0 + (((C) >> 4) & 0x3), \ - 0 + (((C) >> 6) & 0x3), \ - 4 + (((C) >> 0) & 0x3), \ - 4 + (((C) >> 2) & 0x3), \ - 4 + (((C) >> 4) & 0x3), \ - 4 + (((C) >> 6) & 0x3), \ - 8 + (((C) >> 0) & 0x3), \ - 8 + (((C) >> 2) & 0x3), \ - 8 + (((C) >> 4) & 0x3), \ - 8 + (((C) >> 6) & 0x3), \ - 12 + (((C) >> 0) & 0x3), \ - 12 + (((C) >> 2) & 0x3), \ - 12 + (((C) >> 4) & 0x3), \ - 12 + (((C) >> 6) & 0x3)) + (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)) #define _mm512_mask_permute_ps(W, U, X, C) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 7e3c51ffb69..7c85893ba13 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -998,9 +998,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. #define _mm_permute_pd(A, C) \ - (__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \ - (__v2df)_mm_undefined_pd(), \ - ((C) >> 0) & 0x1, ((C) >> 1) & 0x1) + (__m128d)__builtin_ia32_vpermilpd((__v2df)(__m128d)(A), (int)(C)) /// Copies the values in a 256-bit vector of [4 x double] as specified by /// the immediate integer operand. @@ -1040,12 +1038,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// returned vector. /// \returns A 256-bit vector of [4 x double] containing the copied values. #define _mm256_permute_pd(A, C) \ - (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \ - (__v4df)_mm256_undefined_pd(), \ - 0 + (((C) >> 0) & 0x1), \ - 0 + (((C) >> 1) & 0x1), \ - 2 + (((C) >> 2) & 0x1), \ - 2 + (((C) >> 3) & 0x1)) + (__m256d)__builtin_ia32_vpermilpd256((__v4df)(__m256d)(A), (int)(C)) /// Copies the values in a 128-bit vector of [4 x float] as specified by /// the immediate integer operand. @@ -1101,10 +1094,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. #define _mm_permute_ps(A, C) \ - (__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \ - (__v4sf)_mm_undefined_ps(), \ - ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \ - ((C) >> 4) & 0x3, ((C) >> 6) & 0x3) + (__m128)__builtin_ia32_vpermilps((__v4sf)(__m128)(A), (int)(C)) /// Copies the values in a 256-bit vector of [8 x float] as specified by /// the immediate integer operand. @@ -1196,16 +1186,7 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c) /// returned vector. /// \returns A 256-bit vector of [8 x float] containing the copied values. #define _mm256_permute_ps(A, C) \ - (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \ - (__v8sf)_mm256_undefined_ps(), \ - 0 + (((C) >> 0) & 0x3), \ - 0 + (((C) >> 2) & 0x3), \ - 0 + (((C) >> 4) & 0x3), \ - 0 + (((C) >> 6) & 0x3), \ - 4 + (((C) >> 0) & 0x3), \ - 4 + (((C) >> 2) & 0x3), \ - 4 + (((C) >> 4) & 0x3), \ - 4 + (((C) >> 6) & 0x3)) + (__m256)__builtin_ia32_vpermilps256((__v8sf)(__m256)(A), (int)(C)) /// Permutes 128-bit data values stored in two 256-bit vectors of /// [4 x double], as specified by the immediate integer operand. diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index db004607cc0..d16921c6c70 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2612,6 +2612,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_vec_set_v2di: i = 2; l = 0; u = 1; break; + case X86::BI__builtin_ia32_vpermilpd: case X86::BI__builtin_ia32_vec_ext_v4hi: case X86::BI__builtin_ia32_vec_ext_v4si: case X86::BI__builtin_ia32_vec_ext_v4sf: @@ -2676,6 +2677,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_vec_set_v8si: i = 2; l = 0; u = 7; break; + case X86::BI__builtin_ia32_vpermilpd256: case X86::BI__builtin_ia32_roundps: case X86::BI__builtin_ia32_roundpd: case X86::BI__builtin_ia32_roundps256: @@ -2721,6 +2723,10 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_vec_set_v32qi: i = 2; l = 0; u = 31; break; + case X86::BI__builtin_ia32_vpermilps: + case X86::BI__builtin_ia32_vpermilps256: + case X86::BI__builtin_ia32_vpermilpd512: + case X86::BI__builtin_ia32_vpermilps512: case X86::BI__builtin_ia32_vcvtps2ph: case X86::BI__builtin_ia32_vcvtps2ph_mask: case X86::BI__builtin_ia32_vcvtps2ph256: |

