summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-05-21 20:58:23 +0000
committerCraig Topper <craig.topper@intel.com>2018-05-21 20:58:23 +0000
commit288bd2e5a082897be30a050bd14d387ae5eb8208 (patch)
treeed215159d11d54d6c826fdb5641ed164246b07c7 /clang/lib/CodeGen
parentaad3aefaeba7574531aeb125f6d148cdeee83057 (diff)
downloadbcm5719-llvm-288bd2e5a082897be30a050bd14d387ae5eb8208.tar.gz
bcm5719-llvm-288bd2e5a082897be30a050bd14d387ae5eb8208.zip
[X86] Remove masking from pternlog llvm intrinsics and use a select instruction instead.
Because the intrinsics in the headers are implemented as macros, we can't just use a select builtin and pternlog builtin. This would require one of the macro arguments to be used twice. Depending on what was passed to the macro we could expand an expression twice leading to weird behavior. We could maybe declare our local variable in the macro, but that would need to worry about name collisions. To avoid that just generate IR directly in CGBuiltin.cpp. Differential Revision: https://reviews.llvm.org/D47125 llvm-svn: 332891
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp47
1 files changed, 47 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 52204b4103a..c6f0b30bc76 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -8445,6 +8445,37 @@ static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
return CGF.Builder.CreateMul(LHS, RHS);
}
+// Emit a masked pternlog intrinsic. This only exists because the header has to
+// use a macro and we aren't able to pass the input argument to a pternlog
+// builtin and a select builtin without evaluating it twice.
+static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
+ ArrayRef<Value *> Ops) {
+ llvm::Type *Ty = Ops[0]->getType();
+
+ unsigned VecWidth = Ty->getPrimitiveSizeInBits();
+ unsigned EltWidth = Ty->getScalarSizeInBits();
+ Intrinsic::ID IID;
+ if (VecWidth == 128 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_pternlog_d_128;
+ else if (VecWidth == 256 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_pternlog_d_256;
+ else if (VecWidth == 512 && EltWidth == 32)
+ IID = Intrinsic::x86_avx512_pternlog_d_512;
+ else if (VecWidth == 128 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_pternlog_q_128;
+ else if (VecWidth == 256 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_pternlog_q_256;
+ else if (VecWidth == 512 && EltWidth == 64)
+ IID = Intrinsic::x86_avx512_pternlog_q_512;
+ else
+ llvm_unreachable("Unexpected intrinsic");
+
+ Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
+ Ops.drop_back());
+ Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
+ return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
+}
+
static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
llvm::Type *DstTy) {
unsigned NumberOfElements = DstTy->getVectorNumElements();
@@ -9159,6 +9190,22 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_pmuldq512:
return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
+ case X86::BI__builtin_ia32_pternlogd512_mask:
+ case X86::BI__builtin_ia32_pternlogq512_mask:
+ case X86::BI__builtin_ia32_pternlogd128_mask:
+ case X86::BI__builtin_ia32_pternlogd256_mask:
+ case X86::BI__builtin_ia32_pternlogq128_mask:
+ case X86::BI__builtin_ia32_pternlogq256_mask:
+ return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
+
+ case X86::BI__builtin_ia32_pternlogd512_maskz:
+ case X86::BI__builtin_ia32_pternlogq512_maskz:
+ case X86::BI__builtin_ia32_pternlogd128_maskz:
+ case X86::BI__builtin_ia32_pternlogd256_maskz:
+ case X86::BI__builtin_ia32_pternlogq128_maskz:
+ case X86::BI__builtin_ia32_pternlogq256_maskz:
+ return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
+
// 3DNow!
case X86::BI__builtin_ia32_pswapdsf:
case X86::BI__builtin_ia32_pswapdsi: {
OpenPOWER on IntegriCloud