From 44ace925963db17ee1616d44ae6889e23e0424ef Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Tue, 18 Dec 2018 07:53:26 +0000 Subject: [PowerPC] Exploit power9 new instruction setb Check the expected pattens feeding to SELECT_CC like: (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq) (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq) Further transform the sequence to comparison + setb if hits. Differential Revision: https://reviews.llvm.org/D53275 llvm-svn: 349445 --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 167 ++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) (limited to 'llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp') diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6fb1b881d17..4f05f50f699 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -81,6 +81,8 @@ STATISTIC(NumLogicOpsOnComparison, "Number of logical ops on i1 values calculated in GPR."); STATISTIC(OmittedForNonExtendUses, "Number of compares not eliminated as they have non-extending uses."); +STATISTIC(NumP9Setb, + "Number of compares lowered to setb."); // FIXME: Remove this once the bug has been fixed! cl::opt ANDIGlueBug("expose-ppc-andi-glue-bug", @@ -4137,6 +4139,146 @@ void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { CurDAG->setNodeMemRefs(cast(Result), {MemOp}); } +static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, + bool &NeedSwapOps, bool &IsUnCmp) { + + assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here."); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue TrueRes = N->getOperand(2); + SDValue FalseRes = N->getOperand(3); + ConstantSDNode *TrueConst = dyn_cast(TrueRes); + if (!TrueConst) + return false; + + assert((N->getSimpleValueType(0) == MVT::i64 || + N->getSimpleValueType(0) == MVT::i32) && + "Expecting either i64 or i32 here."); + + // We are looking for any of: + // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) + // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) + // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq) + // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq) + int64_t TrueResVal = TrueConst->getSExtValue(); + if ((TrueResVal < -1 || TrueResVal > 1) || + (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) || + (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) || + (TrueResVal == 0 && + (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) + return false; + + bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC; + SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0); + if (SetOrSelCC.getOpcode() != ISD::SETCC && + SetOrSelCC.getOpcode() != ISD::SELECT_CC) + return false; + + // Without this setb optimization, the outer SELECT_CC will be manually + // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass + // transforms pseduo instruction to isel instruction. When there are more than + // one use for result like zext/sext, with current optimization we only see + // isel is replaced by setb but can't see any significant gain. Since + // setb has longer latency than original isel, we should avoid this. Another + // point is that setb requires comparison always kept, it can break the + // oppotunity to get the comparison away if we have in future. + if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) + return false; + + SDValue InnerLHS = SetOrSelCC.getOperand(0); + SDValue InnerRHS = SetOrSelCC.getOperand(1); + ISD::CondCode InnerCC = + cast(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get(); + // If the inner comparison is a select_cc, make sure the true/false values are + // 1/-1 and canonicalize it if needed. + if (InnerIsSel) { + ConstantSDNode *SelCCTrueConst = + dyn_cast(SetOrSelCC.getOperand(2)); + ConstantSDNode *SelCCFalseConst = + dyn_cast(SetOrSelCC.getOperand(3)); + if (!SelCCTrueConst || !SelCCFalseConst) + return false; + int64_t SelCCTVal = SelCCTrueConst->getSExtValue(); + int64_t SelCCFVal = SelCCFalseConst->getSExtValue(); + // The values must be -1/1 (requiring a swap) or 1/-1. + if (SelCCTVal == -1 && SelCCFVal == 1) { + std::swap(InnerLHS, InnerRHS); + } else if (SelCCTVal != 1 || SelCCFVal != -1) + return false; + } + + // Canonicalize unsigned case + if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) { + IsUnCmp = true; + InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT; + } + + bool InnerSwapped = false; + if (LHS == InnerRHS && RHS == InnerLHS) + InnerSwapped = true; + else if (LHS != InnerLHS || RHS != InnerRHS) + return false; + + switch (CC) { + // (select_cc lhs, rhs, 0, \ + // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq) + case ISD::SETEQ: + if (!InnerIsSel) + return false; + if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT) + return false; + NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped; + break; + + // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt) + // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt) + // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt) + // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt) + // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt) + // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt) + case ISD::SETULT: + if (!IsUnCmp && InnerCC != ISD::SETNE) + return false; + IsUnCmp = true; + LLVM_FALLTHROUGH; + case ISD::SETLT: + if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || + (InnerCC == ISD::SETLT && InnerSwapped)) + NeedSwapOps = (TrueResVal == 1); + else + return false; + break; + + // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt) + // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt) + // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt) + // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt) + // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt) + // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt) + case ISD::SETUGT: + if (!IsUnCmp && InnerCC != ISD::SETNE) + return false; + IsUnCmp = true; + LLVM_FALLTHROUGH; + case ISD::SETGT: + if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || + (InnerCC == ISD::SETGT && InnerSwapped)) + NeedSwapOps = (TrueResVal == -1); + else + return false; + break; + + default: + return false; + } + + LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: "); + LLVM_DEBUG(N->dump()); + + return true; +} + // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. void PPCDAGToDAGISel::Select(SDNode *N) { @@ -4599,6 +4741,31 @@ void PPCDAGToDAGISel::Select(SDNode *N) { N->getOperand(0).getValueType() == MVT::i1) break; + if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) { + bool NeedSwapOps = false; + bool IsUnCmp = false; + if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (NeedSwapOps) + std::swap(LHS, RHS); + + // Make use of SelectCC to generate the comparison to set CR bits, for + // equality comparisons having one literal operand, SelectCC probably + // doesn't need to materialize the whole literal and just use xoris to + // check it first, it leads the following comparison result can't + // exactly represent GT/LT relationship. So to avoid this we specify + // SETGT/SETUGT here instead of SETEQ. + SDValue GenCC = + SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl); + CurDAG->SelectNodeTo( + N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB, + N->getValueType(0), GenCC); + NumP9Setb++; + return; + } + } + // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc if (!isPPC64) if (ConstantSDNode *N1C = dyn_cast(N->getOperand(1))) -- cgit v1.2.3