diff options
author | Craig Topper <craig.topper@intel.com> | 2019-01-28 07:03:10 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2019-01-28 07:03:10 +0000 |
commit | 07b6d3de1b7aefa3f2cb9c8cdc8b2db0a8b7f3fc (patch) | |
tree | 8d21ad44bb9d8eff85592539909df1791b1970a1 /clang/lib/CodeGen | |
parent | 453150bc18d74c2ebbef8f780b3b9d2e6278a529 (diff) | |
download | bcm5719-llvm-07b6d3de1b7aefa3f2cb9c8cdc8b2db0a8b7f3fc.tar.gz bcm5719-llvm-07b6d3de1b7aefa3f2cb9c8cdc8b2db0a8b7f3fc.zip |
[X86] Add new variadic avx512 compress/expand intrinsics that use vXi1 types for the mask argument.
Custom lower the builtins to these intrinsics. This enables the middle end to optimize out bitcasts for the masks.
llvm-svn: 352344
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2aa61ffc25a..195b0c078bd 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9149,6 +9149,20 @@ static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] }); } +static Value *EmitX86CompressExpand(CodeGenFunction &CGF, + ArrayRef<Value *> Ops, + bool IsCompress) { + llvm::Type *ResultTy = Ops[1]->getType(); + + Value *MaskVec = getMaskVecValue(CGF, Ops[2], + ResultTy->getVectorNumElements()); + + Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress + : Intrinsic::x86_avx512_mask_expand; + llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy); + return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec }); +} + static Value *EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops) { llvm::Type *ResultTy = Ops[1]->getType(); @@ -10160,6 +10174,46 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_compressstoreqi512_mask: return EmitX86CompressStore(*this, Ops); + case X86::BI__builtin_ia32_expanddf128_mask: + case X86::BI__builtin_ia32_expanddf256_mask: + case X86::BI__builtin_ia32_expanddf512_mask: + case X86::BI__builtin_ia32_expandsf128_mask: + case X86::BI__builtin_ia32_expandsf256_mask: + case X86::BI__builtin_ia32_expandsf512_mask: + case X86::BI__builtin_ia32_expanddi128_mask: + case X86::BI__builtin_ia32_expanddi256_mask: + case X86::BI__builtin_ia32_expanddi512_mask: + case X86::BI__builtin_ia32_expandsi128_mask: + case X86::BI__builtin_ia32_expandsi256_mask: + case X86::BI__builtin_ia32_expandsi512_mask: + case X86::BI__builtin_ia32_expandhi128_mask: + case X86::BI__builtin_ia32_expandhi256_mask: + case X86::BI__builtin_ia32_expandhi512_mask: + case X86::BI__builtin_ia32_expandqi128_mask: + case X86::BI__builtin_ia32_expandqi256_mask: + case X86::BI__builtin_ia32_expandqi512_mask: + return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false); + + case X86::BI__builtin_ia32_compressdf128_mask: + case X86::BI__builtin_ia32_compressdf256_mask: + case X86::BI__builtin_ia32_compressdf512_mask: + case X86::BI__builtin_ia32_compresssf128_mask: + case X86::BI__builtin_ia32_compresssf256_mask: + case X86::BI__builtin_ia32_compresssf512_mask: + case X86::BI__builtin_ia32_compressdi128_mask: + case X86::BI__builtin_ia32_compressdi256_mask: + case X86::BI__builtin_ia32_compressdi512_mask: + case X86::BI__builtin_ia32_compresssi128_mask: + case X86::BI__builtin_ia32_compresssi256_mask: + case X86::BI__builtin_ia32_compresssi512_mask: + case X86::BI__builtin_ia32_compresshi128_mask: + case X86::BI__builtin_ia32_compresshi256_mask: + case X86::BI__builtin_ia32_compresshi512_mask: + case X86::BI__builtin_ia32_compressqi128_mask: + case X86::BI__builtin_ia32_compressqi256_mask: + case X86::BI__builtin_ia32_compressqi512_mask: + return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true); + case X86::BI__builtin_ia32_gather3div2df: case X86::BI__builtin_ia32_gather3div2di: case X86::BI__builtin_ia32_gather3div4df: |