diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-06-08 21:50:07 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-06-08 21:50:07 +0000 |
| commit | 5f50f338064295d9b3f9b5d9ed73f4601d2e226f (patch) | |
| tree | c871a21cd48149b788211f83b8df6acc378436ce /clang/lib/CodeGen | |
| parent | 5a780ee9a91ff4da9230e77df598a7213a689c86 (diff) | |
| download | bcm5719-llvm-5f50f338064295d9b3f9b5d9ed73f4601d2e226f.tar.gz bcm5719-llvm-5f50f338064295d9b3f9b5d9ed73f4601d2e226f.zip | |
[X86] Fold masking into subvector extract builtins.
I'm looking into making the select builtins require avx512f, avx512bw, or avx512vl since masking operations generally require those features.
The extract builtins are funny because the 512-bit versions return a 128 or 256 bit vector with masking even when avx512vl is not supported.
llvm-svn: 334330
Diffstat (limited to 'clang/lib/CodeGen')
| -rw-r--r-- | clang/lib/CodeGen/CGBuiltin.cpp | 37 |
1 files changed, 21 insertions, 16 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ac9f46cb449..012428dbb28 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9239,18 +9239,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vextractf128_ps256: case X86::BI__builtin_ia32_vextractf128_si256: case X86::BI__builtin_ia32_extract128i256: - case X86::BI__builtin_ia32_extractf64x4: - case X86::BI__builtin_ia32_extractf32x4: - case X86::BI__builtin_ia32_extracti64x4: - case X86::BI__builtin_ia32_extracti32x4: - case X86::BI__builtin_ia32_extractf32x8: - case X86::BI__builtin_ia32_extracti32x8: - case X86::BI__builtin_ia32_extractf32x4_256: - case X86::BI__builtin_ia32_extracti32x4_256: - case X86::BI__builtin_ia32_extractf64x2_256: - case X86::BI__builtin_ia32_extracti64x2_256: - case X86::BI__builtin_ia32_extractf64x2_512: - case X86::BI__builtin_ia32_extracti64x2_512: { + case X86::BI__builtin_ia32_extractf64x4_mask: + case X86::BI__builtin_ia32_extractf32x4_mask: + case X86::BI__builtin_ia32_extracti64x4_mask: + case X86::BI__builtin_ia32_extracti32x4_mask: + case X86::BI__builtin_ia32_extractf32x8_mask: + case X86::BI__builtin_ia32_extracti32x8_mask: + case X86::BI__builtin_ia32_extractf32x4_256_mask: + case X86::BI__builtin_ia32_extracti32x4_256_mask: + case X86::BI__builtin_ia32_extractf64x2_256_mask: + case X86::BI__builtin_ia32_extracti64x2_256_mask: + case X86::BI__builtin_ia32_extractf64x2_512_mask: + case X86::BI__builtin_ia32_extracti64x2_512_mask: { llvm::Type *DstTy = ConvertType(E->getType()); unsigned NumElts = DstTy->getVectorNumElements(); unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue() * NumElts; @@ -9259,10 +9259,15 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i + Index; - return Builder.CreateShuffleVector(Ops[0], - UndefValue::get(Ops[0]->getType()), - makeArrayRef(Indices, NumElts), - "extract"); + Value *Res = Builder.CreateShuffleVector(Ops[0], + UndefValue::get(Ops[0]->getType()), + makeArrayRef(Indices, NumElts), + "extract"); + + if (Ops.size() == 4) + Res = EmitX86Select(*this, Ops[3], Res, Ops[2]); + + return Res; } case X86::BI__builtin_ia32_vinsertf128_pd256: case X86::BI__builtin_ia32_vinsertf128_ps256: |

