summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Zuckerman <Michael.zuckerman@intel.com>2016-07-05 08:08:01 +0000
committerMichael Zuckerman <Michael.zuckerman@intel.com>2016-07-05 08:08:01 +0000
commit7dac6fbdf8cd7e08a879c71ae2e0ceab140f8cc5 (patch)
tree04cbda0a44176eb319a7625fc7bbaf0c5d6b4207
parent6ecaec83ba01705c2e0f4afa37029a8a294c3cd2 (diff)
downloadbcm5719-llvm-7dac6fbdf8cd7e08a879c71ae2e0ceab140f8cc5.tar.gz
bcm5719-llvm-7dac6fbdf8cd7e08a879c71ae2e0ceab140f8cc5.zip
[Clang][BuiltIn][AVX512] adding _mm{|256|512}_mask_cvt{s|us|}epi16_storeu_epi8 intrinsics
Differential Revision: http://reviews.llvm.org/D21729 llvm-svn: 274532
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def9
-rw-r--r--clang/lib/Headers/avx512bwintrin.h18
-rw-r--r--clang/lib/Headers/avx512vlbwintrin.h36
-rw-r--r--clang/test/CodeGen/avx512bw-builtins.c20
-rw-r--r--clang/test/CodeGen/avx512vlbw-builtins.c42
5 files changed, 125 insertions, 0 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 8a33783988f..4686603891a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1945,6 +1945,7 @@ TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, "V16shV16sUs","","avx512b
TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs","","avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi","","avx512bw")
TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8LLiV16cUc","","avx512f")
@@ -1955,8 +1956,10 @@ TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8LLiV8sUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8LLiUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovsdb128_mask, "V16cV4iV16cUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovsdb128mem_mask, "vV16c*V4iUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovswb128mem_mask, "vV16c*V8sUc","","avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_pmovsdb256_mask, "V16cV8iV16cUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovsdb256mem_mask, "vV16c*V8iUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovswb256mem_mask, "vV16c*V16sUs","","avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_pmovsdw128_mask, "V8sV4iV8sUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovsdw128mem_mask, "vV8s*V4iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovsdw256_mask, "V8sV8iV8sUc","","avx512vl")
@@ -1975,6 +1978,7 @@ TARGET_BUILTIN(__builtin_ia32_pmovsqw256_mask, "V8sV4LLiV8sUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs","","avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb512mem_mask, "vV32c*V32sUi","","avx512bw")
TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8LLiV16cUc","","avx512f")
@@ -1985,8 +1989,10 @@ TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8LLiV8sUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8LLiUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovusdb128_mask, "V16cV4iV16cUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovusdb128mem_mask, "vV16c*V4iUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb128mem_mask, "vV16c*V8sUc","","avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_pmovusdb256_mask, "V16cV8iV16cUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovusdb256mem_mask, "vV16c*V8iUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb256mem_mask, "vV16c*V16sUs","","avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_pmovusdw128_mask, "V8sV4iV8sUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc","","avx512vl")
@@ -2005,6 +2011,7 @@ TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs","","avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi","","avx512bw")
TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc","","avx512f")
@@ -2014,9 +2021,11 @@ TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc","","avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc","","avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_pmovdb128mem_mask, "vV16c*V4iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovdb256_mask, "V16cV8iV16cUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovdb256mem_mask, "vV16c*V8iUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovwb256mem_mask, "vV16c*V16sUs","","avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc","","avx512vl")
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 1d4e7f17410..e3caa404fdf 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1394,6 +1394,24 @@ _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
__M);
}
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
+{
+ __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
+{
+ __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
+{
+ __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
+}
+
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 713c8716b85..ad8b8c14068 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -1999,6 +1999,25 @@ _mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) {
__M);
}
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
+}
+
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
+}
+
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm256_cvtepi16_epi8 (__m256i __A) {
return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
@@ -2020,6 +2039,23 @@ _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) {
__M);
}
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
+{
+ __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
+{
+ __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
+{
+ __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M);
+}
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
diff --git a/clang/test/CodeGen/avx512bw-builtins.c b/clang/test/CodeGen/avx512bw-builtins.c
index 7d73ce5828e..75efcbaa754 100644
--- a/clang/test/CodeGen/avx512bw-builtins.c
+++ b/clang/test/CodeGen/avx512bw-builtins.c
@@ -1590,3 +1590,23 @@ __mmask32 test_mm512_movepi16_mask(__m512i __A) {
return _mm512_movepi16_mask(__A);
}
+void test_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
+{
+ // CHECK-LABEL: @test_mm512_mask_cvtepi16_storeu_epi8
+ // CHECK: @llvm.x86.avx512.mask.pmov.wb.mem.512
+ __builtin_ia32_pmovwb512mem_mask ( __P, __A, __M);
+}
+
+void test_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
+{
+ // CHECK-LABEL: @test_mm512_mask_cvtsepi16_storeu_epi8
+ // CHECK: @llvm.x86.avx512.mask.pmovs.wb.mem.512
+ __builtin_ia32_pmovswb512mem_mask ( __P, __A, __M);
+}
+
+void test_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
+{
+ // CHECK-LABEL: @test_mm512_mask_cvtusepi16_storeu_epi8
+ // CHECK: @llvm.x86.avx512.mask.pmovus.wb.mem.512
+ __builtin_ia32_pmovuswb512mem_mask ( __P, __A, __M);
+}
diff --git a/clang/test/CodeGen/avx512vlbw-builtins.c b/clang/test/CodeGen/avx512vlbw-builtins.c
index bab8b5a41ca..6779b7275d3 100644
--- a/clang/test/CodeGen/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/avx512vlbw-builtins.c
@@ -2518,3 +2518,45 @@ __m256i test_mm256_maskz_shufflelo_epi16(__mmask32 __U, __m256i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_shufflelo_epi16(__U, __A, 5);
}
+
+void test_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ // CHECK-LABEL:@test_mm_mask_cvtepi16_storeu_epi8
+ // CHECK: @llvm.x86.avx512.mask.pmov.wb.mem.128
+ _mm_mask_cvtepi16_storeu_epi8 (__P, __M, __A);
+}
+
+void test_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ // CHECK-LABEL:@test_mm_mask_cvtsepi16_storeu_epi8
+ // CHECK: @llvm.x86.avx512.mask.pmovs.wb.mem.128
+ _mm_mask_cvtsepi16_storeu_epi8 ( __P, __M, __A);
+}
+
+void test_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
+{
+ // CHECK-LABEL:@test_mm_mask_cvtusepi16_storeu_epi8
+ // CHECK: @llvm.x86.avx512.mask.pmovus.wb.mem.128
+ _mm_mask_cvtusepi16_storeu_epi8 (__P, __M, __A);
+}
+
+void test_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
+{
+ // CHECK-LABEL:@test_mm256_mask_cvtusepi16_storeu_epi8
+ // CHECK: @llvm.x86.avx512.mask.pmovus.wb.mem.256
+ _mm256_mask_cvtusepi16_storeu_epi8 ( __P, __M, __A);
+}
+
+void test_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
+{
+ // CHECK-LABEL:@test_mm256_mask_cvtepi16_storeu_epi8
+ // CHECK: @llvm.x86.avx512.mask.pmov.wb.mem.256
+ _mm256_mask_cvtepi16_storeu_epi8 ( __P, __M, __A);
+}
+
+void test_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
+{
+ // CHECK-LABEL:@test_mm256_mask_cvtsepi16_storeu_epi8
+ // CHECK: @llvm.x86.avx512.mask.pmovs.wb.mem.256
+ _mm256_mask_cvtsepi16_storeu_epi8 ( __P, __M, __A);
+}
OpenPOWER on IntegriCloud