summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def2
-rw-r--r--clang/lib/Headers/avx512vlintrin.h57
-rw-r--r--clang/test/CodeGen/avx512vl-builtins.c15
3 files changed, 28 insertions, 46 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 74c0c039c9d..bb53628e911 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1799,8 +1799,6 @@ TARGET_BUILTIN(__builtin_ia32_permvarhi128_mask, "V8sV8sV8sV8sUc","","avx512bw,a
TARGET_BUILTIN(__builtin_ia32_permvarhi256_mask, "V16sV16sV16sV16sUs","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_permvardf256_mask, "V4dV4dV4LLiV4dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_permvardi256_mask, "V4LLiV4LLiV4LLiV4LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarsf256_mask, "V8fV8fV8iV8fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarsi256_mask, "V8iV8iV8iV8iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc","","avx512dq,avx512vl")
TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc","","avx512dq,avx512vl")
TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc","","avx512dq,avx512vl")
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index fb8056e3f8d..e6d4cea56d2 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -8178,60 +8178,41 @@ _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
__M);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X,
- __m256 __Y)
-{
- return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
- (__v8si) __X,
- (__v8sf) __W,
- (__mmask8) __U);
-}
+#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y)
+_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
{
- return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
- (__v8si) __X,
- (__v8sf) _mm256_setzero_ps (),
- (__mmask8) __U);
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+ (__v8sf)_mm256_permutexvar_ps(__X, __Y),
+ (__v8sf)__W);
}
static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_permutexvar_ps (__m256i __X, __m256 __Y)
+_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
{
- return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
- (__v8si) __X,
- (__v8sf) _mm256_undefined_si256 (),
- (__mmask8) -1);
+ return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
+ (__v8sf)_mm256_permutexvar_ps(__X, __Y),
+ (__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y)
-{
- return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
- (__v8si) __X,
- (__v8si) _mm256_setzero_si256 (),
- __M);
-}
+#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
- __m256i __Y)
+_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
+ __m256i __Y)
{
- return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
- (__v8si) __X,
- (__v8si) __W,
- (__mmask8) __M);
+ return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+ (__v8si)_mm256_permutexvar_epi32(__X, __Y),
+ (__v8si)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
+_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
{
- return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
- (__v8si) __X,
- (__v8si) _mm256_undefined_si256(),
- (__mmask8) -1);
+ return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
+ (__v8si)_mm256_permutexvar_epi32(__X, __Y),
+ (__v8si)_mm256_setzero_si256());
}
#define _mm_alignr_epi32(A, B, imm) __extension__ ({ \
diff --git a/clang/test/CodeGen/avx512vl-builtins.c b/clang/test/CodeGen/avx512vl-builtins.c
index 008eb09b8b1..00a0813801a 100644
--- a/clang/test/CodeGen/avx512vl-builtins.c
+++ b/clang/test/CodeGen/avx512vl-builtins.c
@@ -7019,37 +7019,40 @@ __m256i test_mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X
__m256 test_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) {
// CHECK-LABEL: @test_mm256_mask_permutexvar_ps
- // CHECK: @llvm.x86.avx512.mask.permvar.sf.256
+ // CHECK: @llvm.x86.avx2.permps
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_permutexvar_ps(__W, __U, __X, __Y);
}
__m256 test_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) {
// CHECK-LABEL: @test_mm256_maskz_permutexvar_ps
- // CHECK: @llvm.x86.avx512.mask.permvar.sf.256
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_permutexvar_ps(__U, __X, __Y);
}
__m256 test_mm256_permutexvar_ps(__m256i __X, __m256 __Y) {
// CHECK-LABEL: @test_mm256_permutexvar_ps
- // CHECK: @llvm.x86.avx512.mask.permvar.sf.256
+ // CHECK: @llvm.x86.avx2.permps
return _mm256_permutexvar_ps( __X, __Y);
}
__m256i test_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) {
// CHECK-LABEL: @test_mm256_maskz_permutexvar_epi32
- // CHECK: @llvm.x86.avx512.mask.permvar.si.256
+ // CHECK: @llvm.x86.avx2.permd
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_permutexvar_epi32(__M, __X, __Y);
}
__m256i test_mm256_permutexvar_epi32(__m256i __X, __m256i __Y) {
// CHECK-LABEL: @test_mm256_permutexvar_epi32
- // CHECK: @llvm.x86.avx512.mask.permvar.si.256
+ // CHECK: @llvm.x86.avx2.permd
return _mm256_permutexvar_epi32(__X, __Y);
}
__m256i test_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
// CHECK-LABEL: @test_mm256_mask_permutexvar_epi32
- // CHECK: @llvm.x86.avx512.mask.permvar.si.256
+ // CHECK: @llvm.x86.avx2.permd
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_permutexvar_epi32(__W, __M, __X, __Y);
}
OpenPOWER on IntegriCloud