summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGBuiltin.cpp
diff options
context:
space:
mode:
authorGabor Buella <gabor.buella@intel.com>2018-06-22 11:59:16 +0000
committerGabor Buella <gabor.buella@intel.com>2018-06-22 11:59:16 +0000
commit716863c820db13af6f03c5858c2554f2fc8ac9c8 (patch)
treeb7ea08292cc4527bbbd9e9bc67b2798cdfb6cfe0 /clang/lib/CodeGen/CGBuiltin.cpp
parent2b772c11364077b6f7a84fdb733139c5846cad02 (diff)
downloadbcm5719-llvm-716863c820db13af6f03c5858c2554f2fc8ac9c8.tar.gz
bcm5719-llvm-716863c820db13af6f03c5858c2554f2fc8ac9c8.zip
[X86] Lower _mm[256|512]_cmp[.]_mask intrinsics to native llvm IR
Summary: Lowering some vector comparision builtins to fcmp IR instructions. This ignores the signaling behaviour specified in the predicate argument of said builtins. Affected AVX512 builtins: __builtin_ia32_cmpps128_mask __builtin_ia32_cmpps256_mask __builtin_ia32_cmpps512_mask __builtin_ia32_cmppd128_mask __builtin_ia32_cmppd256_mask __builtin_ia32_cmppd512_mask Reviewers: craig.topper, uriel.k, RKSimon, andrew.w.kaylor, spatel, scanon, efriedma Reviewed By: craig.topper, spatel, efriedma Differential Revision: https://reviews.llvm.org/D45616 llvm-svn: 335339
Diffstat (limited to 'clang/lib/CodeGen/CGBuiltin.cpp')
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp165
1 files changed, 74 insertions, 91 deletions
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b9430e6fc9a..de96127987f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10120,44 +10120,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateExtractValue(Call, 1);
}
- case X86::BI__builtin_ia32_cmpps128_mask:
- case X86::BI__builtin_ia32_cmpps256_mask:
- case X86::BI__builtin_ia32_cmpps512_mask:
- case X86::BI__builtin_ia32_cmppd128_mask:
- case X86::BI__builtin_ia32_cmppd256_mask:
- case X86::BI__builtin_ia32_cmppd512_mask: {
- unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
- Value *MaskIn = Ops[3];
- Ops.erase(&Ops[3]);
-
- Intrinsic::ID ID;
- switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cmpps128_mask:
- ID = Intrinsic::x86_avx512_mask_cmp_ps_128;
- break;
- case X86::BI__builtin_ia32_cmpps256_mask:
- ID = Intrinsic::x86_avx512_mask_cmp_ps_256;
- break;
- case X86::BI__builtin_ia32_cmpps512_mask:
- ID = Intrinsic::x86_avx512_mask_cmp_ps_512;
- break;
- case X86::BI__builtin_ia32_cmppd128_mask:
- ID = Intrinsic::x86_avx512_mask_cmp_pd_128;
- break;
- case X86::BI__builtin_ia32_cmppd256_mask:
- ID = Intrinsic::x86_avx512_mask_cmp_pd_256;
- break;
- case X86::BI__builtin_ia32_cmppd512_mask:
- ID = Intrinsic::x86_avx512_mask_cmp_pd_512;
- break;
- }
-
- Value *Cmp = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
- return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn);
- }
-
- // SSE packed comparison intrinsics
+ // packed comparison intrinsics
case X86::BI__builtin_ia32_cmpeqps:
case X86::BI__builtin_ia32_cmpeqpd:
return getVectorFCmpIR(CmpInst::FCMP_OEQ);
@@ -10185,64 +10148,84 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_cmpps:
case X86::BI__builtin_ia32_cmpps256:
case X86::BI__builtin_ia32_cmppd:
- case X86::BI__builtin_ia32_cmppd256: {
+ case X86::BI__builtin_ia32_cmppd256:
+ case X86::BI__builtin_ia32_cmpps128_mask:
+ case X86::BI__builtin_ia32_cmpps256_mask:
+ case X86::BI__builtin_ia32_cmpps512_mask:
+ case X86::BI__builtin_ia32_cmppd128_mask:
+ case X86::BI__builtin_ia32_cmppd256_mask:
+ case X86::BI__builtin_ia32_cmppd512_mask: {
+ // Lowering vector comparisons to fcmp instructions, while
+ // ignoring signalling behaviour requested
+ // ignoring rounding mode requested
+ // This is is only possible as long as FENV_ACCESS is not implemented.
+ // See also: https://reviews.llvm.org/D45616
+
+ // The third argument is the comparison condition, and integer in the
+ // range [0, 31]
unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
- // If this one of the SSE immediates, we can use native IR.
- if (CC < 8) {
- FCmpInst::Predicate Pred;
- switch (CC) {
- case 0: Pred = FCmpInst::FCMP_OEQ; break;
- case 1: Pred = FCmpInst::FCMP_OLT; break;
- case 2: Pred = FCmpInst::FCMP_OLE; break;
- case 3: Pred = FCmpInst::FCMP_UNO; break;
- case 4: Pred = FCmpInst::FCMP_UNE; break;
- case 5: Pred = FCmpInst::FCMP_UGE; break;
- case 6: Pred = FCmpInst::FCMP_UGT; break;
- case 7: Pred = FCmpInst::FCMP_ORD; break;
- }
- return getVectorFCmpIR(Pred);
+
+ // Lowering to IR fcmp instruction.
+ // Ignoring requested signaling behaviour,
+ // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
+ FCmpInst::Predicate Pred;
+ switch (CC) {
+ case 0x00: Pred = FCmpInst::FCMP_OEQ; break;
+ case 0x01: Pred = FCmpInst::FCMP_OLT; break;
+ case 0x02: Pred = FCmpInst::FCMP_OLE; break;
+ case 0x03: Pred = FCmpInst::FCMP_UNO; break;
+ case 0x04: Pred = FCmpInst::FCMP_UNE; break;
+ case 0x05: Pred = FCmpInst::FCMP_UGE; break;
+ case 0x06: Pred = FCmpInst::FCMP_UGT; break;
+ case 0x07: Pred = FCmpInst::FCMP_ORD; break;
+ case 0x08: Pred = FCmpInst::FCMP_UEQ; break;
+ case 0x09: Pred = FCmpInst::FCMP_ULT; break;
+ case 0x0a: Pred = FCmpInst::FCMP_ULE; break;
+ case 0x0c: Pred = FCmpInst::FCMP_ONE; break;
+ case 0x0d: Pred = FCmpInst::FCMP_OGE; break;
+ case 0x0e: Pred = FCmpInst::FCMP_OGT; break;
+ case 0x10: Pred = FCmpInst::FCMP_OEQ; break;
+ case 0x11: Pred = FCmpInst::FCMP_OLT; break;
+ case 0x12: Pred = FCmpInst::FCMP_OLE; break;
+ case 0x13: Pred = FCmpInst::FCMP_UNO; break;
+ case 0x14: Pred = FCmpInst::FCMP_UNE; break;
+ case 0x15: Pred = FCmpInst::FCMP_UGE; break;
+ case 0x16: Pred = FCmpInst::FCMP_UGT; break;
+ case 0x17: Pred = FCmpInst::FCMP_ORD; break;
+ case 0x18: Pred = FCmpInst::FCMP_UEQ; break;
+ case 0x19: Pred = FCmpInst::FCMP_ULT; break;
+ case 0x1a: Pred = FCmpInst::FCMP_ULE; break;
+ case 0x1c: Pred = FCmpInst::FCMP_ONE; break;
+ case 0x1d: Pred = FCmpInst::FCMP_OGE; break;
+ case 0x1e: Pred = FCmpInst::FCMP_OGT; break;
+ // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
+ // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
+ case 0x0b: // FALSE_OQ
+ case 0x1b: // FALSE_OS
+ return llvm::Constant::getNullValue(ConvertType(E->getType()));
+ case 0x0f: // TRUE_UQ
+ case 0x1f: // TRUE_US
+ return llvm::Constant::getAllOnesValue(ConvertType(E->getType()));
+
+ default: llvm_unreachable("Unhandled CC");
}
- // We can't handle 8-31 immediates with native IR, use the intrinsic.
- // Except for predicates that create constants.
- Intrinsic::ID ID;
+ // Builtins without the _mask suffix return a vector of integers
+ // of the same width as the input vectors
switch (BuiltinID) {
- default: llvm_unreachable("Unsupported intrinsic!");
- case X86::BI__builtin_ia32_cmpps:
- ID = Intrinsic::x86_sse_cmp_ps;
- break;
- case X86::BI__builtin_ia32_cmpps256:
- // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
- // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
- if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
- Value *Constant = (CC == 0xf || CC == 0x1f) ?
- llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
- llvm::Constant::getNullValue(Builder.getInt32Ty());
- Value *Vec = Builder.CreateVectorSplat(
- Ops[0]->getType()->getVectorNumElements(), Constant);
- return Builder.CreateBitCast(Vec, Ops[0]->getType());
- }
- ID = Intrinsic::x86_avx_cmp_ps_256;
- break;
- case X86::BI__builtin_ia32_cmppd:
- ID = Intrinsic::x86_sse2_cmp_pd;
- break;
- case X86::BI__builtin_ia32_cmppd256:
- // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
- // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
- if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
- Value *Constant = (CC == 0xf || CC == 0x1f) ?
- llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
- llvm::Constant::getNullValue(Builder.getInt64Ty());
- Value *Vec = Builder.CreateVectorSplat(
- Ops[0]->getType()->getVectorNumElements(), Constant);
- return Builder.CreateBitCast(Vec, Ops[0]->getType());
- }
- ID = Intrinsic::x86_avx_cmp_pd_256;
- break;
+ case X86::BI__builtin_ia32_cmpps512_mask:
+ case X86::BI__builtin_ia32_cmppd512_mask:
+ case X86::BI__builtin_ia32_cmpps128_mask:
+ case X86::BI__builtin_ia32_cmpps256_mask:
+ case X86::BI__builtin_ia32_cmppd128_mask:
+ case X86::BI__builtin_ia32_cmppd256_mask: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+ return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
+ }
+ default:
+ return getVectorFCmpIR(Pred);
}
-
- return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
}
// SSE scalar comparison intrinsics
OpenPOWER on IntegriCloud