From 07b6d3de1b7aefa3f2cb9c8cdc8b2db0a8b7f3fc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Jan 2019 07:03:10 +0000 Subject: [X86] Add new variadic avx512 compress/expand intrinsics that use vXi1 types for the mask argument. Custom lower the builtins to these intrinsics. This enables the middle end to optimize out bitcasts for the masks. llvm-svn: 352344 --- clang/lib/CodeGen/CGBuiltin.cpp | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'clang/lib/CodeGen') diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2aa61ffc25a..195b0c078bd 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9149,6 +9149,20 @@ static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] }); } +static Value *EmitX86CompressExpand(CodeGenFunction &CGF, + ArrayRef Ops, + bool IsCompress) { + llvm::Type *ResultTy = Ops[1]->getType(); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + ResultTy->getVectorNumElements()); + + Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress + : Intrinsic::x86_avx512_mask_expand; + llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy); + return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec }); +} + static Value *EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef Ops) { llvm::Type *ResultTy = Ops[1]->getType(); @@ -10160,6 +10174,46 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_compressstoreqi512_mask: return EmitX86CompressStore(*this, Ops); + case X86::BI__builtin_ia32_expanddf128_mask: + case X86::BI__builtin_ia32_expanddf256_mask: + case X86::BI__builtin_ia32_expanddf512_mask: + case X86::BI__builtin_ia32_expandsf128_mask: + case X86::BI__builtin_ia32_expandsf256_mask: + case X86::BI__builtin_ia32_expandsf512_mask: + case X86::BI__builtin_ia32_expanddi128_mask: + case X86::BI__builtin_ia32_expanddi256_mask: + case X86::BI__builtin_ia32_expanddi512_mask: + case X86::BI__builtin_ia32_expandsi128_mask: + case X86::BI__builtin_ia32_expandsi256_mask: + case X86::BI__builtin_ia32_expandsi512_mask: + case X86::BI__builtin_ia32_expandhi128_mask: + case X86::BI__builtin_ia32_expandhi256_mask: + case X86::BI__builtin_ia32_expandhi512_mask: + case X86::BI__builtin_ia32_expandqi128_mask: + case X86::BI__builtin_ia32_expandqi256_mask: + case X86::BI__builtin_ia32_expandqi512_mask: + return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false); + + case X86::BI__builtin_ia32_compressdf128_mask: + case X86::BI__builtin_ia32_compressdf256_mask: + case X86::BI__builtin_ia32_compressdf512_mask: + case X86::BI__builtin_ia32_compresssf128_mask: + case X86::BI__builtin_ia32_compresssf256_mask: + case X86::BI__builtin_ia32_compresssf512_mask: + case X86::BI__builtin_ia32_compressdi128_mask: + case X86::BI__builtin_ia32_compressdi256_mask: + case X86::BI__builtin_ia32_compressdi512_mask: + case X86::BI__builtin_ia32_compresssi128_mask: + case X86::BI__builtin_ia32_compresssi256_mask: + case X86::BI__builtin_ia32_compresssi512_mask: + case X86::BI__builtin_ia32_compresshi128_mask: + case X86::BI__builtin_ia32_compresshi256_mask: + case X86::BI__builtin_ia32_compresshi512_mask: + case X86::BI__builtin_ia32_compressqi128_mask: + case X86::BI__builtin_ia32_compressqi256_mask: + case X86::BI__builtin_ia32_compressqi512_mask: + return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true); + case X86::BI__builtin_ia32_gather3div2df: case X86::BI__builtin_ia32_gather3div2di: case X86::BI__builtin_ia32_gather3div4df: -- cgit v1.2.3