summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-06-08 21:50:08 +0000
committerCraig Topper <craig.topper@intel.com>2018-06-08 21:50:08 +0000
commit88097d93555ef61cd43b5532870cf5d2f6564f5c (patch)
tree14f3aeb910a4467c8318b44cb0f14b3ffa1713bb /clang
parent5f50f338064295d9b3f9b5d9ed73f4601d2e226f (diff)
downloadbcm5719-llvm-88097d93555ef61cd43b5532870cf5d2f6564f5c.tar.gz
bcm5719-llvm-88097d93555ef61cd43b5532870cf5d2f6564f5c.zip
[X86] Add back some masked vector truncate builtins. Custom IRgen a a few others.
I'd like to make the select builtins require an avx512f, avx512bw, or avx512vl fature to match what is normally required to get masking. Truncate is special in that there are instructions with a 128/256-bit masked result even without avx512vl. By using special buitlins we can emit a select without using the 128/256-bit select builtins. llvm-svn: 334331
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/Basic/BuiltinsX86.def2
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp29
-rw-r--r--clang/lib/Headers/avx512bwintrin.h16
-rw-r--r--clang/lib/Headers/avx512fintrin.h27
4 files changed, 57 insertions, 17 deletions
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index b694059adca..24c15530047 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1298,6 +1298,7 @@ TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32sIiV32sUi", "nc", "av
TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq")
TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq")
TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq")
@@ -1648,6 +1649,7 @@ TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "n", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 012428dbb28..9106df0f1c6 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -9309,6 +9309,35 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
makeArrayRef(Indices, DstNumElts),
"insert");
}
+ case X86::BI__builtin_ia32_pmovqd512_mask:
+ case X86::BI__builtin_ia32_pmovwb512_mask: {
+ Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
+ return EmitX86Select(*this, Ops[2], Res, Ops[1]);
+ }
+ case X86::BI__builtin_ia32_pmovdb512_mask:
+ case X86::BI__builtin_ia32_pmovdw512_mask:
+ case X86::BI__builtin_ia32_pmovqw512_mask: {
+ if (const auto *C = dyn_cast<Constant>(Ops[2]))
+ if (C->isAllOnesValue())
+ return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
+
+ Intrinsic::ID IID;
+ switch (BuiltinID) {
+ default: llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_pmovdb512_mask:
+ IID = Intrinsic::x86_avx512_mask_pmov_db_512;
+ break;
+ case X86::BI__builtin_ia32_pmovdw512_mask:
+ IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
+ break;
+ case X86::BI__builtin_ia32_pmovqw512_mask:
+ IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
+ break;
+ }
+
+ Function *Intr = CGM.getIntrinsic(IID);
+ return Builder.CreateCall(Intr, Ops);
+ }
case X86::BI__builtin_ia32_pblendw128:
case X86::BI__builtin_ia32_blendpd:
case X86::BI__builtin_ia32_blendps:
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 0e21a57f311..2590ec71549 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1080,21 +1080,23 @@ _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_cvtepi16_epi8 (__m512i __A) {
- return (__m256i)__builtin_convertvector((__v32hi)__A, __v32qi);
+ return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+ (__v32qi) _mm256_undefined_si256(),
+ (__mmask32) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
- return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
- (__v32qi)_mm512_cvtepi16_epi8(__A),
- (__v32qi)__O);
+ return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+ (__v32qi) __O,
+ __M);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
- return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
- (__v32qi)_mm512_cvtepi16_epi8(__A),
- (__v32qi)_mm256_setzero_si256());
+ return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+ (__v32qi) _mm256_setzero_si256(),
+ __M);
}
static __inline__ void __DEFAULT_FN_ATTRS
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index dbac414fff9..9daa559bb13 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -7402,7 +7402,9 @@ _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_cvtepi32_epi8 (__m512i __A)
{
- return (__m128i)__builtin_convertvector((__v16si)__A, __v16qi);
+ return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+ (__v16qi) _mm_undefined_si128 (),
+ (__mmask16) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -7429,7 +7431,9 @@ _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_cvtepi32_epi16 (__m512i __A)
{
- return (__m256i)__builtin_convertvector((__v16si)__A, __v16hi);
+ return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+ (__v16hi) _mm256_undefined_si256 (),
+ (__mmask16) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -7485,23 +7489,24 @@ _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_cvtepi64_epi32 (__m512i __A)
{
- return (__m256i)__builtin_convertvector((__v8di) __A, __v8si);
+ return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+ (__v8si) _mm256_undefined_si256 (),
+ (__mmask8) -1);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
{
- return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
- (__v8si)_mm512_cvtepi64_epi32(__A),
- (__v8si)__O);
+ return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+ (__v8si) __O, __M);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
{
- return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
- (__v8si)_mm512_cvtepi64_epi32(__A),
- (__v8si)_mm256_setzero_si256());
+ return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+ (__v8si) _mm256_setzero_si256 (),
+ __M);
}
static __inline__ void __DEFAULT_FN_ATTRS
@@ -7513,7 +7518,9 @@ _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm512_cvtepi64_epi16 (__m512i __A)
{
- return (__m128i)__builtin_convertvector((__v8di)__A, __v8hi);
+ return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+ (__v8hi) _mm_undefined_si128 (),
+ (__mmask8) -1);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS
OpenPOWER on IntegriCloud