summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-07-02 05:36:43 +0000
committerCraig Topper <craig.topper@gmail.com>2016-07-02 05:36:43 +0000
commitb3a4477b13b18bf4fef157b9a7b2f8862ac18104 (patch)
treed1758a08961184a38f016fca4ebcfc160d6b2fff /clang
parentc835c12f6c4e6ca1b721f45ae27bf2385f0cf48a (diff)
downloadbcm5719-llvm-b3a4477b13b18bf4fef157b9a7b2f8862ac18104.tar.gz
bcm5719-llvm-b3a4477b13b18bf4fef157b9a7b2f8862ac18104.zip
[X86] Replace 128-bit and 256 masked vpermilps/vpermilpd builtins with native IR.
llvm-svn: 274425
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def4
-rw-r--r--clang/lib/Headers/avx512vlintrin.h47
-rw-r--r--clang/lib/Sema/SemaChecking.cpp4
-rw-r--r--clang/test/CodeGen/avx512vl-builtins.c40
4 files changed, 48 insertions, 47 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 0ed9563b7fd..e15462b86a0 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1800,10 +1800,6 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2vard512_maskz, "V16iV16iV16iV16iUs","","avx
TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_maskz, "V8dV8LLiV8dV8dUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_maskz, "V16fV16iV16fV16fUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilpd_mask, "V2dV2dIiV2dUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermilpd256_mask, "V4dV4dIiV4dUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermilps_mask, "V4fV4fIiV4fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermilps256_mask, "V8fV8fIiV8fUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpermilvarpd_mask, "V2dV2dV2LLiV2dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256_mask, "V4dV4dV4LLiV4dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpermilvarps_mask, "V4fV4fV4iV4fUc","","avx512vl")
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 6bef81c0960..e97ce7179c2 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -6700,43 +6700,44 @@ _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
}
#define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \
- (__m128d)__builtin_ia32_vpermilpd_mask((__v2df)(__m128d)(X), (int)(C), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U)); })
+ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+ (__v2df)_mm_permute_pd((X), (C)), \
+ (__v2df)(__m128d)(W)); })
#define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \
- (__m128d)__builtin_ia32_vpermilpd_mask((__v2df)(__m128d)(X), (int)(C), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U)); })
+ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+ (__v2df)_mm_permute_pd((X), (C)), \
+ (__v2df)_mm_setzero_pd()); })
#define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \
- (__m256d)__builtin_ia32_vpermilpd256_mask((__v4df)(__m256d)(X), (int)(C), \
- (__v4df)(__m256d)(W), \
- (__mmask8)(U)); })
+ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_permute_pd((X), (C)), \
+ (__v4df)(__m256d)(W)); })
#define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \
- (__m256d)__builtin_ia32_vpermilpd256_mask((__v4df)(__m256d)(X), (int)(C), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)(U)); })
+ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_permute_pd((X), (C)), \
+ (__v4df)_mm256_setzero_pd()); })
#define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \
- (__m128)__builtin_ia32_vpermilps_mask((__v4sf)(__m128)(X), (int)(C), \
- (__v4sf)(__m128)(W), (__mmask8)(U)); })
+ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+ (__v4sf)_mm_permute_ps((X), (C)), \
+ (__v4sf)(__m128)(W)); })
#define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \
- (__m128)__builtin_ia32_vpermilps_mask((__v4sf)(__m128)(X), (int)(C), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U)); })
+ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+ (__v4sf)_mm_permute_ps((X), (C)), \
+ (__v4sf)_mm_setzero_ps()); })
#define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \
- (__m256)__builtin_ia32_vpermilps256_mask((__v8sf)(__m256)(X), (int)(C), \
- (__v8sf)(__m256)(W), \
- (__mmask8)(U)); })
+ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm256_permute_ps((X), (C)), \
+ (__v8sf)(__m256)(W)); })
#define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \
- (__m256)__builtin_ia32_vpermilps256_mask((__v8sf)(__m256)(X), (int)(C), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)(U)); })
+ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm256_permute_ps((X), (C)), \
+ (__v8sf)_mm256_setzero_ps()); })
static __inline__ __m128d __DEFAULT_FN_ATTRS
_mm_mask_permutevar_pd (__m128d __W, __mmask8 __U, __m128d __A,
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 866c25d6acc..89e5e3e5af1 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -1388,7 +1388,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI_mm_prefetch:
case X86::BI__builtin_ia32_extractf32x4_mask:
case X86::BI__builtin_ia32_extracti32x4_mask:
- case X86::BI__builtin_ia32_vpermilpd_mask:
case X86::BI__builtin_ia32_extractf64x2_512_mask:
case X86::BI__builtin_ia32_extracti64x2_512_mask:
i = 1; l = 0; u = 3;
@@ -1459,7 +1458,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_roundpd:
case X86::BI__builtin_ia32_roundps256:
case X86::BI__builtin_ia32_roundpd256:
- case X86::BI__builtin_ia32_vpermilpd256_mask:
i = 1; l = 0; u = 15;
break;
case X86::BI__builtin_ia32_roundss:
@@ -1563,8 +1561,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_fpclasspd512_mask:
case X86::BI__builtin_ia32_fpclasssd_mask:
case X86::BI__builtin_ia32_fpclassss_mask:
- case X86::BI__builtin_ia32_vpermilps_mask:
- case X86::BI__builtin_ia32_vpermilps256_mask:
case X86::BI__builtin_ia32_vpermilps512_mask:
i = 1; l = 0; u = 255;
break;
diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c
index 340a9d01002..33fcae285e9 100644
--- a/clang/test/CodeGen/avx512vl-builtins.c
+++ b/clang/test/CodeGen/avx512vl-builtins.c
@@ -4610,50 +4610,58 @@ __m256 test_mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A) {
__m128d test_mm_mask_permute_pd(__m128d __W, __mmask8 __U, __m128d __X) {
// CHECK-LABEL: @test_mm_mask_permute_pd
- // CHECK: @llvm.x86.avx512.mask.vpermil.pd
- return _mm_mask_permute_pd(__W, __U, __X, 2);
+ // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
+ return _mm_mask_permute_pd(__W, __U, __X, 1);
}
__m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) {
// CHECK-LABEL: @test_mm_maskz_permute_pd
- // CHECK: @llvm.x86.avx512.mask.vpermil.pd
- return _mm_maskz_permute_pd(__U, __X, 2);
+ // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
+ return _mm_maskz_permute_pd(__U, __X, 1);
}
__m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) {
// CHECK-LABEL: @test_mm256_mask_permute_pd
- // CHECK: @llvm.x86.avx512.mask.vpermil.pd.256
- return _mm256_mask_permute_pd(__W, __U, __X, 2);
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+ return _mm256_mask_permute_pd(__W, __U, __X, 5);
}
__m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) {
// CHECK-LABEL: @test_mm256_maskz_permute_pd
- // CHECK: @llvm.x86.avx512.mask.vpermil.pd.256
- return _mm256_maskz_permute_pd(__U, __X, 2);
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+ return _mm256_maskz_permute_pd(__U, __X, 5);
}
__m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) {
// CHECK-LABEL: @test_mm_mask_permute_ps
- // CHECK: @llvm.x86.avx512.mask.vpermil.ps
- return _mm_mask_permute_ps(__W, __U, __X, 2);
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+ return _mm_mask_permute_ps(__W, __U, __X, 0x1b);
}
__m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) {
// CHECK-LABEL: @test_mm_maskz_permute_ps
- // CHECK: @llvm.x86.avx512.mask.vpermil.ps
- return _mm_maskz_permute_ps(__U, __X, 2);
+ // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+ return _mm_maskz_permute_ps(__U, __X, 0x1b);
}
__m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) {
// CHECK-LABEL: @test_mm256_mask_permute_ps
- // CHECK: @llvm.x86.avx512.mask.vpermil.ps.256
- return _mm256_mask_permute_ps(__W, __U, __X, 2);
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+ return _mm256_mask_permute_ps(__W, __U, __X, 0x1b);
}
__m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) {
// CHECK-LABEL: @test_mm256_maskz_permute_ps
- // CHECK: @llvm.x86.avx512.mask.vpermil.ps.256
- return _mm256_maskz_permute_ps(__U, __X, 2);
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
+ return _mm256_maskz_permute_ps(__U, __X, 0x1b);
}
__m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) {
OpenPOWER on IntegriCloud