[PowerPC] Recommit r314244 with refactoring and off by default

This re-commits everything that was pulled in r314244. The transformation is off by default (patch to enable it to follow). The code is refactored to have a single entry-point and provide fine-grained control over patterns that it selects. This patch also fixes the bugs in the original code. Everything that failed with the original patch has been re-tested with this patch (with the transformation turned on). So the patch to turn this on is soon to follow. Differential Revision: https://reviews.llvm.org/D38575 llvm-svn: 319434
author: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2017-11-30 13:39:10 +0000
committer: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2017-11-30 13:39:10 +0000
commit: db7e77047c2432742dd1dd26f3cc96ee180c9c34 (patch)
tree: ab15236c0d74712dec35f23b60f5a42b2fa65c26 /llvm/lib
parent: bb791b3dbdc009412c1a27b14b22091b15d3728b (diff)
download: bcm5719-llvm-db7e77047c2432742dd1dd26f3cc96ee180c9c34.tar.gz
bcm5719-llvm-db7e77047c2432742dd1dd26f3cc96ee180c9c34.zip
1 files changed, 1236 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6289765c6b8..c53549d8e03 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -69,6 +69,19 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ppc-codegen"
 
+STATISTIC(NumSextSetcc,
+          "Number of (sext(setcc)) nodes expanded into GPR sequence.");
+STATISTIC(NumZextSetcc,
+          "Number of (zext(setcc)) nodes expanded into GPR sequence.");
+STATISTIC(SignExtensionsAdded,
+          "Number of sign extensions for compare inputs added.");
+STATISTIC(ZeroExtensionsAdded,
+          "Number of zero extensions for compare inputs added.");
+STATISTIC(NumLogicOpsOnComparison,
+          "Number of logical ops on i1 values calculated in GPR.");
+STATISTIC(OmittedForNonExtendUses,
+          "Number of compares not eliminated as they have non-extending uses.");
+
 // FIXME: Remove this once the bug has been fixed!
 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
@@ -88,6 +101,29 @@ static cl::opt<bool> EnableBranchHint(
     cl::desc("Enable static hinting of branches on ppc"),
     cl::Hidden);
 
+enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
+  ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
+  ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
+
+static cl::opt<ICmpInGPRType> CmpInGPR(
+  "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_None),
+  cl::desc("Specify the types of comparisons to emit GPR-only code for."),
+  cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
+             clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
+             clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
+             clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
+             clEnumValN(ICGPR_NonExtIn, "nonextin",
+                        "Only comparisons where inputs don't need [sz]ext."),
+             clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
+             clEnumValN(ICGPR_ZextI32, "zexti32",
+                        "Only i32 comparisons with zext result."),
+             clEnumValN(ICGPR_ZextI64, "zexti64",
+                        "Only i64 comparisons with zext result."),
+             clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
+             clEnumValN(ICGPR_SextI32, "sexti32",
+                        "Only i32 comparisons with sext result."),
+             clEnumValN(ICGPR_SextI64, "sexti64",
+                        "Only i64 comparisons with sext result.")));
 namespace {
 
   //===--------------------------------------------------------------------===//
@@ -161,6 +197,7 @@ namespace {
 
     bool tryBitfieldInsert(SDNode *N);
     bool tryBitPermutation(SDNode *N);
+    bool tryIntCompareInGPR(SDNode *N);
 
     /// SelectCC - Select a comparison of the specified values with the
     /// specified condition code, returning the CR# of the expression.
@@ -2064,8 +2101,1203 @@ public:
   }
 };
 
+class IntegerCompareEliminator {
+  SelectionDAG *CurDAG;
+  PPCDAGToDAGISel *S;
+  // Conversion type for interpreting results of a 32-bit instruction as
+  // a 64-bit value or vice versa.
+  enum ExtOrTruncConversion { Ext, Trunc };
+
+  // Modifiers to guide how an ISD::SETCC node's result is to be computed
+  // in a GPR.
+  // ZExtOrig - use the original condition code, zero-extend value
+  // ZExtInvert - invert the condition code, zero-extend value
+  // SExtOrig - use the original condition code, sign-extend value
+  // SExtInvert - invert the condition code, sign-extend value
+  enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
+
+  // Comparisons against zero to emit GPR code sequences for. Each of these
+  // sequences may need to be emitted for two or more equivalent patterns.
+  // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
+  // matters as well as the extension type: sext (-1/0), zext (1/0).
+  // GEZExt - (zext (LHS >= 0))
+  // GESExt - (sext (LHS >= 0))
+  // LEZExt - (zext (LHS <= 0))
+  // LESExt - (sext (LHS <= 0))
+  enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
+
+  SDNode *tryEXTEND(SDNode *N);
+  SDNode *tryLogicOpOfCompares(SDNode *N);
+  SDValue computeLogicOpInGPR(SDValue LogicOp);
+  SDValue signExtendInputIfNeeded(SDValue Input);
+  SDValue zeroExtendInputIfNeeded(SDValue Input);
+  SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
+  SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
+                                        ZeroCompare CmpTy);
+  SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                              int64_t RHSValue, SDLoc dl);
+ SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                              int64_t RHSValue, SDLoc dl);
+  SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                              int64_t RHSValue, SDLoc dl);
+  SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                              int64_t RHSValue, SDLoc dl);
+  SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
+
+public:
+  IntegerCompareEliminator(SelectionDAG *DAG,
+                           PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
+    assert(CurDAG->getTargetLoweringInfo()
+           .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
+           "Only expecting to use this on 64 bit targets.");
+  }
+  SDNode *Select(SDNode *N) {
+    if (CmpInGPR == ICGPR_None)
+      return nullptr;
+    switch (N->getOpcode()) {
+    default: break;
+    case ISD::ZERO_EXTEND:
+      if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
+          CmpInGPR == ICGPR_SextI64)
+        return nullptr;
+    case ISD::SIGN_EXTEND:
+      if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
+          CmpInGPR == ICGPR_ZextI64)
+        return nullptr;
+      return tryEXTEND(N);
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR:
+      return tryLogicOpOfCompares(N);
+    }
+    return nullptr;
+  }
+};
+
+static bool isLogicOp(unsigned Opc) {
+  return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
+}
+// The obvious case for wanting to keep the value in a GPR. Namely, the
+// result of the comparison is actually needed in a GPR.
+SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
+  assert((N->getOpcode() == ISD::ZERO_EXTEND ||
+          N->getOpcode() == ISD::SIGN_EXTEND) &&
+         "Expecting a zero/sign extend node!");
+  SDValue WideRes;
+  // If we are zero-extending the result of a logical operation on i1
+  // values, we can keep the values in GPRs.
+  if (isLogicOp(N->getOperand(0).getOpcode()) &&
+      N->getOperand(0).getValueType() == MVT::i1 &&
+      N->getOpcode() == ISD::ZERO_EXTEND)
+    WideRes = computeLogicOpInGPR(N->getOperand(0));
+  else if (N->getOperand(0).getOpcode() != ISD::SETCC)
+    return nullptr;
+  else
+    WideRes =
+      getSETCCInGPR(N->getOperand(0),
+                    N->getOpcode() == ISD::SIGN_EXTEND ?
+                    SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
+
+  if (!WideRes)
+    return nullptr;
+
+  SDLoc dl(N);
+  bool Input32Bit = WideRes.getValueType() == MVT::i32;
+  bool Output32Bit = N->getValueType(0) == MVT::i32;
+
+  NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
+  NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
+
+  SDValue ConvOp = WideRes;
+  if (Input32Bit != Output32Bit)
+    ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
+                           ExtOrTruncConversion::Trunc);
+  return ConvOp.getNode();
+}
+
+// Attempt to perform logical operations on the results of comparisons while
+// keeping the values in GPRs. Without doing so, these would end up being
+// lowered to CR-logical operations which suffer from significant latency and
+// low ILP.
+SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
+  if (N->getValueType(0) != MVT::i1)
+    return nullptr;
+  assert(isLogicOp(N->getOpcode()) &&
+         "Expected a logic operation on setcc results.");
+  SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
+  if (!LoweredLogical)
+    return nullptr;
+
+  SDLoc dl(N);
+  bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
+  unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
+  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
+  SDValue LHS = LoweredLogical.getOperand(0);
+  SDValue RHS = LoweredLogical.getOperand(1);
+  SDValue WideOp;
+  SDValue OpToConvToRecForm;
+
+  // Look through any 32-bit to 64-bit implicit extend nodes to find the
+  // opcode that is input to the XORI.
+  if (IsBitwiseNegate &&
+      LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
+    OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
+  else if (IsBitwiseNegate)
+    // If the input to the XORI isn't an extension, that's what we're after.
+    OpToConvToRecForm = LoweredLogical.getOperand(0);
+  else
+    // If this is not an XORI, it is a reg-reg logical op and we can convert
+    // it to record-form.
+    OpToConvToRecForm = LoweredLogical;
+
+  // Get the record-form version of the node we're looking to use to get the
+  // CR result from.
+  uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
+  int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
+
+  // Convert the right node to record-form. This is either the logical we're
+  // looking at or it is the input node to the negation (if we're looking at
+  // a bitwise negation).
+  if (NewOpc != -1 && IsBitwiseNegate) {
+    // The input to the XORI has a record-form. Use it.
+    assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
+           "Expected a PPC::XORI8 only for bitwise negation.");
+    // Emit the record-form instruction.
+    std::vector<SDValue> Ops;
+    for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
+      Ops.push_back(OpToConvToRecForm.getOperand(i));
+
+    WideOp =
+      SDValue(CurDAG->getMachineNode(NewOpc, dl,
+                                     OpToConvToRecForm.getValueType(),
+                                     MVT::Glue, Ops), 0);
+  } else {
+    assert((NewOpc != -1 || !IsBitwiseNegate) &&
+           "No record form available for AND8/OR8/XOR8?");
+    WideOp =
+      SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
+                                     MVT::i64, MVT::Glue, LHS, RHS), 0);
+  }
+
+  // Select this node to a single bit from CR0 set by the record-form node
+  // just created. For bitwise negation, use the EQ bit which is the equivalent
+  // of negating the result (i.e. it is a bit set when the result of the
+  // operation is zero).
+  SDValue SRIdxVal =
+    CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
+  SDValue CRBit =
+    SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
+                                   MVT::i1, CR0Reg, SRIdxVal,
+                                   WideOp.getValue(1)), 0);
+  return CRBit.getNode();
+}
+
+// Lower a logical operation on i1 values into a GPR sequence if possible.
+// The result can be kept in a GPR if requested.
+// Three types of inputs can be handled:
+// - SETCC
+// - TRUNCATE
+// - Logical operation (AND/OR/XOR)
+// There is also a special case that is handled (namely a complement operation
+// achieved with xor %a, -1).
+SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
+  assert(isLogicOp(LogicOp.getOpcode()) &&
+        "Can only handle logic operations here.");
+  assert(LogicOp.getValueType() == MVT::i1 &&
+         "Can only handle logic operations on i1 values here.");
+  SDLoc dl(LogicOp);
+  SDValue LHS, RHS;
+
+ // Special case: xor %a, -1
+  bool IsBitwiseNegation = isBitwiseNot(LogicOp);
+
+  // Produces a GPR sequence for each operand of the binary logic operation.
+  // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
+  // the value in a GPR and for logic operations, it will recursively produce
+  // a GPR sequence for the operation.
+ auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
+    unsigned OperandOpcode = Operand.getOpcode();
+    if (OperandOpcode == ISD::SETCC)
+      return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
+    else if (OperandOpcode == ISD::TRUNCATE) {
+      SDValue InputOp = Operand.getOperand(0);
+     EVT InVT = InputOp.getValueType();
+      return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
+                                            PPC::RLDICL, dl, InVT, InputOp,
+                                            S->getI64Imm(0, dl),
+                                            S->getI64Imm(63, dl)), 0);
+    } else if (isLogicOp(OperandOpcode))
+      return computeLogicOpInGPR(Operand);
+    return SDValue();
+  };
+  LHS = getLogicOperand(LogicOp.getOperand(0));
+  RHS = getLogicOperand(LogicOp.getOperand(1));
+
+  // If a GPR sequence can't be produced for the LHS we can't proceed.
+  // Not producing a GPR sequence for the RHS is only a problem if this isn't
+  // a bitwise negation operation.
+  if (!LHS || (!RHS && !IsBitwiseNegation))
+    return SDValue();
+
+  NumLogicOpsOnComparison++;
+
+  // We will use the inputs as 64-bit values.
+  if (LHS.getValueType() == MVT::i32)
+    LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
+  if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
+    RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
+
+  unsigned NewOpc;
+  switch (LogicOp.getOpcode()) {
+  default: llvm_unreachable("Unknown logic operation.");
+  case ISD::AND: NewOpc = PPC::AND8; break;
+  case ISD::OR:  NewOpc = PPC::OR8;  break;
+  case ISD::XOR: NewOpc = PPC::XOR8; break;
+  }
+
+  if (IsBitwiseNegation) {
+    RHS = S->getI64Imm(1, dl);
+    NewOpc = PPC::XORI8;
+  }
+
+  return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
+
+}
+
+/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
+/// Otherwise just reinterpret it as a 64-bit value.
+/// Useful when emitting comparison code for 32-bit values without using
+/// the compare instruction (which only considers the lower 32-bits).
+SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
+  assert(Input.getValueType() == MVT::i32 &&
+         "Can only sign-extend 32-bit values here.");
+  unsigned Opc = Input.getOpcode();
+
+  // The value was sign extended and then truncated to 32-bits. No need to
+  // sign extend it again.
+  if (Opc == ISD::TRUNCATE &&
+      (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
+       Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
+    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
+
+  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
+  // The input is a sign-extending load. All ppc sign-extending loads
+  // sign-extend to the full 64-bits.
+  if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
+    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
+
+  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
+  // We don't sign-extend constants.
+  if (InputConst)
+    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
+
+  SDLoc dl(Input);
+  SignExtensionsAdded++;
+  return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
+                                        MVT::i64, Input), 0);
+}
+
+/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
+/// Otherwise just reinterpret it as a 64-bit value.
+/// Useful when emitting comparison code for 32-bit values without using
+/// the compare instruction (which only considers the lower 32-bits).
+SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
+  assert(Input.getValueType() == MVT::i32 &&
+         "Can only zero-extend 32-bit values here.");
+  unsigned Opc = Input.getOpcode();
+
+  // The only condition under which we can omit the actual extend instruction:
+  // - The value is a positive constant
+  // - The value comes from a load that isn't a sign-extending load
+  // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
+  bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
+    (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
+     Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
+  if (IsTruncateOfZExt)
+    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
+
+  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
+  if (InputConst && InputConst->getSExtValue() >= 0)
+    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
+
+  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
+  // The input is a load that doesn't sign-extend (it will be zero-extended).
+  if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
+    return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
+
+  // None of the above, need to zero-extend.
+  SDLoc dl(Input);
+  ZeroExtensionsAdded++;
+  return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
+                                        S->getI64Imm(0, dl),
+                                        S->getI64Imm(32, dl)), 0);
+}
+
+// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
+// course not actual zero/sign extensions that will generate machine code,
+// they're just a way to reinterpret a 32 bit value in a register as a
+// 64 bit value and vice-versa.
+SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
+                                                ExtOrTruncConversion Conv) {
+  SDLoc dl(NatWidthRes);
+
+  // For reinterpreting 32-bit values as 64 bit values, we generate
+  // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
+  if (Conv == ExtOrTruncConversion::Ext) {
+    SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
+    SDValue SubRegIdx =
+      CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
+    return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
+                                          ImDef, NatWidthRes, SubRegIdx), 0);
+  }
+
+  assert(Conv == ExtOrTruncConversion::Trunc &&
+         "Unknown convertion between 32 and 64 bit values.");
+  // For reinterpreting 64-bit values as 32-bit values, we just need to
+  // EXTRACT_SUBREG (i.e. extract the low word).
+  SDValue SubRegIdx =
+    CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
+  return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
+                                        NatWidthRes, SubRegIdx), 0);
+}
+
+// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
+// Handle both zero-extensions and sign-extensions.
+SDValue
+IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
+                                                         ZeroCompare CmpTy) {
+  EVT InVT = LHS.getValueType();
+  bool Is32Bit = InVT == MVT::i32;
+  SDValue ToExtend;
+
+  // Produce the value that needs to be either zero or sign extended.
+  switch (CmpTy) {
+  case ZeroCompare::GEZExt:
+  case ZeroCompare::GESExt:
+    ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
+                                              dl, InVT, LHS, LHS), 0);
+    break;
+  case ZeroCompare::LEZExt:
+  case ZeroCompare::LESExt: {
+    if (Is32Bit) {
+      // Upper 32 bits cannot be undefined for this sequence.
+      LHS = signExtendInputIfNeeded(LHS);
+      SDValue Neg =
+        SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
+      ToExtend =
+        SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
+                                       Neg, S->getI64Imm(1, dl),
+                                       S->getI64Imm(63, dl)), 0);
+    } else {
+      SDValue Addi =
+        SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
+                                       S->getI64Imm(~0ULL, dl)), 0);
+      ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
+                                                Addi, LHS), 0);
+    }
+    break;
+  }
+  }
+
+  // For 64-bit sequences, the extensions are the same for the GE/LE cases.
+  if (!Is32Bit &&
+      (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
+    return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
+                                          ToExtend, S->getI64Imm(1, dl),
+                                          S->getI64Imm(63, dl)), 0);
+  if (!Is32Bit &&
+      (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
+    return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
+                                          S->getI64Imm(63, dl)), 0);
+
+  assert(Is32Bit && "Should have handled the 32-bit sequences above.");
+  // For 32-bit sequences, the extensions differ between GE/LE cases.
+  switch (CmpTy) {
+  case ZeroCompare::GEZExt: {
+    SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
+                           S->getI32Imm(31, dl) };
+    return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
+                                          ShiftOps), 0);
+  }
+  case ZeroCompare::GESExt:
+    return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
+                                          S->getI32Imm(31, dl)), 0);
+  case ZeroCompare::LEZExt:
+    return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
+                                          S->getI32Imm(1, dl)), 0);
+  case ZeroCompare::LESExt:
+    return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
+                                          S->getI32Imm(-1, dl)), 0);
+  }
+
+  // The above case covers all the enumerators so it can't have a default clause
+  // to avoid compiler warnings.
+  llvm_unreachable("Unknown zero-comparison type.");
+}
+
+/// Produces a zero-extended result of comparing two 32-bit values according to
+/// the passed condition code.
+SDValue
+IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
+                                              ISD::CondCode CC,
+                                              int64_t RHSValue, SDLoc dl) {
+  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
+      CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)
+    return SDValue();
+  bool IsRHSZero = RHSValue == 0;
+  bool IsRHSOne = RHSValue == 1;
+  bool IsRHSNegOne = RHSValue == -1LL;
+  switch (CC) {
+  default: return SDValue();
+  case ISD::SETEQ: {
+    // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
+    // (zext (setcc %a, 0, seteq))  -> (lshr (cntlzw %a), 5)
+    SDValue Xor = IsRHSZero ? LHS :
+      SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+    SDValue Clz =
+      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
+    SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
+      S->getI32Imm(31, dl) };
+    return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
+                                          ShiftOps), 0);
+  }
+  case ISD::SETNE: {
+    // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
+    // (zext (setcc %a, 0, setne))  -> (xor (lshr (cntlzw %a), 5), 1)
+    SDValue Xor = IsRHSZero ? LHS :
+      SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+    SDValue Clz =
+      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
+    SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
+      S->getI32Imm(31, dl) };
+    SDValue Shift =
+      SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
+                                          S->getI32Imm(1, dl)), 0);
+  }
+  case ISD::SETGE: {
+    // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
+    // (zext (setcc %a, 0, setge))  -> (lshr (~ %a), 31)
+    if(IsRHSZero)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
+
+    // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
+    // by swapping inputs and falling through.
+    std::swap(LHS, RHS);
+    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+    IsRHSZero = RHSConst && RHSConst->isNullValue();
+    LLVM_FALLTHROUGH;
+  }
+  case ISD::SETLE: {
+    if (CmpInGPR == ICGPR_NonExtIn)
+      return SDValue();
+    // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
+    // (zext (setcc %a, 0, setle))  -> (xor (lshr (- %a), 63), 1)
+    if(IsRHSZero) {
+      if (CmpInGPR == ICGPR_NonExtIn)
+        return SDValue();
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
+    }
+
+    // The upper 32-bits of the register can't be undefined for this sequence.
+    LHS = signExtendInputIfNeeded(LHS);
+    RHS = signExtendInputIfNeeded(RHS);
+    SDValue Sub =
+      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
+    SDValue Shift =
+      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
+                                     S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
+              0);
+    return
+      SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
+                                     MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
+  }
+  case ISD::SETGT: {
+    // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
+    // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
+    // (zext (setcc %a, 0, setgt))  -> (lshr (- %a), 63)
+    // Handle SETLT -1 (which is equivalent to SETGE 0).
+    if (IsRHSNegOne)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
+
+    if (IsRHSZero) {
+      if (CmpInGPR == ICGPR_NonExtIn)
+        return SDValue();
+      // The upper 32-bits of the register can't be undefined for this sequence.
+      LHS = signExtendInputIfNeeded(LHS);
+      RHS = signExtendInputIfNeeded(RHS);
+      SDValue Neg =
+        SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
+      return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
+                     Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
+    }
+    // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
+    // (%b < %a) by swapping inputs and falling through.
+    std::swap(LHS, RHS);
+    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+    IsRHSZero = RHSConst && RHSConst->isNullValue();
+    IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
+    LLVM_FALLTHROUGH;
+  }
+  case ISD::SETLT: {
+    // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
+    // (zext (setcc %a, 1, setlt))  -> (xor (lshr (- %a), 63), 1)
+    // (zext (setcc %a, 0, setlt))  -> (lshr %a, 31)
+    // Handle SETLT 1 (which is equivalent to SETLE 0).
+    if (IsRHSOne) {
+      if (CmpInGPR == ICGPR_NonExtIn)
+        return SDValue();
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
+    }
+
+    if (IsRHSZero) {
+      SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
+                             S->getI32Imm(31, dl) };
+      return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
+                                            ShiftOps), 0);
+    }
+
+    if (CmpInGPR == ICGPR_NonExtIn)
+      return SDValue();
+    // The upper 32-bits of the register can't be undefined for this sequence.
+    LHS = signExtendInputIfNeeded(LHS);
+    RHS = signExtendInputIfNeeded(RHS);
+    SDValue SUBFNode =
+      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
+                                    SUBFNode, S->getI64Imm(1, dl),
+                                    S->getI64Imm(63, dl)), 0);
+  }
+  case ISD::SETUGE:
+    // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
+    // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
+  case ISD::SETULE: {
+    if (CmpInGPR == ICGPR_NonExtIn)
+      return SDValue();
+    // The upper 32-bits of the register can't be undefined for this sequence.
+    LHS = zeroExtendInputIfNeeded(LHS);
+    RHS = zeroExtendInputIfNeeded(RHS);
+    SDValue Subtract =
+      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
+    SDValue SrdiNode =
+      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
+                                          Subtract, S->getI64Imm(1, dl),
+                                          S->getI64Imm(63, dl)), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
+                                            S->getI32Imm(1, dl)), 0);
+  }
+  case ISD::SETUGT:
+    // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
+    // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
+  case ISD::SETULT: {
+    if (CmpInGPR == ICGPR_NonExtIn)
+      return SDValue();
+    // The upper 32-bits of the register can't be undefined for this sequence.
+    LHS = zeroExtendInputIfNeeded(LHS);
+    RHS = zeroExtendInputIfNeeded(RHS);
+    SDValue Subtract =
+      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
+                                          Subtract, S->getI64Imm(1, dl),
+                                          S->getI64Imm(63, dl)), 0);
+  }
+  }
+}
+
+/// Produces a sign-extended result of comparing two 32-bit values according to
+/// the passed condition code.
+SDValue
+IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
+                                              ISD::CondCode CC,
+                                              int64_t RHSValue, SDLoc dl) {
+  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
+      CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)
+    return SDValue();
+  bool IsRHSZero = RHSValue == 0;
+  bool IsRHSOne = RHSValue == 1;
+  bool IsRHSNegOne = RHSValue == -1LL;
+
+  switch (CC) {
+  default: return SDValue();
+  case ISD::SETEQ: {
+    // (sext (setcc %a, %b, seteq)) ->
+    //   (ashr (shl (ctlz (xor %a, %b)), 58), 63)
+    // (sext (setcc %a, 0, seteq)) ->
+    //   (ashr (shl (ctlz %a), 58), 63)
+    SDValue CountInput = IsRHSZero ? LHS :
+      SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+    SDValue Cntlzw =
+      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
+    SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
+                         S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
+    SDValue Slwi =
+      SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
+  }
+  case ISD::SETNE: {
+    // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
+    // flip the bit, finally take 2's complement.
+    // (sext (setcc %a, %b, setne)) ->
+    //   (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
+    // Same as above, but the first xor is not needed.
+    // (sext (setcc %a, 0, setne)) ->
+    //   (neg (xor (lshr (ctlz %a), 5), 1))
+    SDValue Xor = IsRHSZero ? LHS :
+      SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+    SDValue Clz =
+      SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
+    SDValue ShiftOps[] =
+      { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
+    SDValue Shift =
+      SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
+    SDValue Xori =
+      SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
+                                     S->getI32Imm(1, dl)), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
+  }
+  case ISD::SETGE: {
+    // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
+    // (sext (setcc %a, 0, setge))  -> (ashr (~ %a), 31)
+    if (IsRHSZero)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
+
+    // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
+    // by swapping inputs and falling through.
+    std::swap(LHS, RHS);
+    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+    IsRHSZero = RHSConst && RHSConst->isNullValue();
+    LLVM_FALLTHROUGH;
+  }
+  case ISD::SETLE: {
+    if (CmpInGPR == ICGPR_NonExtIn)
+      return SDValue();
+    // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
+    // (sext (setcc %a, 0, setle))  -> (add (lshr (- %a), 63), -1)
+    if (IsRHSZero)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
+
+    // The upper 32-bits of the register can't be undefined for this sequence.
+    LHS = signExtendInputIfNeeded(LHS);
+    RHS = signExtendInputIfNeeded(RHS);
+    SDValue SUBFNode =
+      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
+                                     LHS, RHS), 0);
+    SDValue Srdi =
+      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
+                                     SUBFNode, S->getI64Imm(1, dl),
+                                     S->getI64Imm(63, dl)), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
+                                          S->getI32Imm(-1, dl)), 0);
+  }
+  case ISD::SETGT: {
+    // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
+    // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
+    // (sext (setcc %a, 0, setgt))  -> (ashr (- %a), 63)
+    if (IsRHSNegOne)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
+    if (IsRHSZero) {
+      if (CmpInGPR == ICGPR_NonExtIn)
+        return SDValue();
+      // The upper 32-bits of the register can't be undefined for this sequence.
+      LHS = signExtendInputIfNeeded(LHS);
+      RHS = signExtendInputIfNeeded(RHS);
+      SDValue Neg =
+        SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
+        return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
+                                              S->getI64Imm(63, dl)), 0);
+    }
+    // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
+    // (%b < %a) by swapping inputs and falling through.
+    std::swap(LHS, RHS);
+    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+    IsRHSZero = RHSConst && RHSConst->isNullValue();
+    IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
+    LLVM_FALLTHROUGH;
+  }
+  case ISD::SETLT: {
+    // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
+    // (sext (setcc %a, 1, setgt))  -> (add (lshr (- %a), 63), -1)
+    // (sext (setcc %a, 0, setgt))  -> (ashr %a, 31)
+    if (IsRHSOne) {
+      if (CmpInGPR == ICGPR_NonExtIn)
+        return SDValue();
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
+    }
+    if (IsRHSZero)
+      return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
+                                            S->getI32Imm(31, dl)), 0);
+
+    if (CmpInGPR == ICGPR_NonExtIn)
+      return SDValue();
+    // The upper 32-bits of the register can't be undefined for this sequence.
+    LHS = signExtendInputIfNeeded(LHS);
+    RHS = signExtendInputIfNeeded(RHS);
+    SDValue SUBFNode =
+      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
+                                          SUBFNode, S->getI64Imm(63, dl)), 0);
+  }
+  case ISD::SETUGE:
+    // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
+    // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
+  case ISD::SETULE: {
+    if (CmpInGPR == ICGPR_NonExtIn)
+      return SDValue();
+    // The upper 32-bits of the register can't be undefined for this sequence.
+    LHS = zeroExtendInputIfNeeded(LHS);
+    RHS = zeroExtendInputIfNeeded(RHS);
+    SDValue Subtract =
+      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
+    SDValue Shift =
+      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
+                                     S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
+              0);
+    return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
+                                          S->getI32Imm(-1, dl)), 0);
+  }
+  case ISD::SETUGT:
+    // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
+    // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
+  case ISD::SETULT: {
+    if (CmpInGPR == ICGPR_NonExtIn)
+      return SDValue();
+    // The upper 32-bits of the register can't be undefined for this sequence.
+    LHS = zeroExtendInputIfNeeded(LHS);
+    RHS = zeroExtendInputIfNeeded(RHS);
+    SDValue Subtract =
+      SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
+                                          Subtract, S->getI64Imm(63, dl)), 0);
+  }
+  }
+}
+
+/// Produces a zero-extended result of comparing two 64-bit values according to
+/// the passed condition code.
+SDValue
+IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
+                                              ISD::CondCode CC,
+                                              int64_t RHSValue, SDLoc dl) {
+  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
+      CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)
+    return SDValue();
+  bool IsRHSZero = RHSValue == 0;
+  bool IsRHSOne = RHSValue == 1;
+  bool IsRHSNegOne = RHSValue == -1LL;
+  switch (CC) {
+  default: return SDValue();
+  case ISD::SETEQ: {
+    // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
+    // (zext (setcc %a, 0, seteq)) ->  (lshr (ctlz %a), 6)
+    SDValue Xor = IsRHSZero ? LHS :
+      SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
+    SDValue Clz =
+      SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
+                                          S->getI64Imm(58, dl),
+                                          S->getI64Imm(63, dl)), 0);
+  }
+  case ISD::SETNE: {
+    // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
+    // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
+    // {addcz.reg, addcz.CA} = (addcarry %a, -1)
+    // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
+    SDValue Xor = IsRHSZero ? LHS :
+      SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
+    SDValue AC =
+      SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
+                                     Xor, S->getI32Imm(~0U, dl)), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
+                                          Xor, AC.getValue(1)), 0);
+  }
+  case ISD::SETGE: {
+    // {subc.reg, subc.CA} = (subcarry %a, %b)
+    // (zext (setcc %a, %b, setge)) ->
+    //   (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
+    // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
+    if (IsRHSZero)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
+    std::swap(LHS, RHS);
+    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+    IsRHSZero = RHSConst && RHSConst->isNullValue();
+    LLVM_FALLTHROUGH;
+  }
+  case ISD::SETLE: {
+    // {subc.reg, subc.CA} = (subcarry %b, %a)
+    // (zext (setcc %a, %b, setge)) ->
+    //   (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
+    // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
+    if (IsRHSZero)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
+    SDValue ShiftL =
+      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
+                                     S->getI64Imm(1, dl),
+                                     S->getI64Imm(63, dl)), 0);
+    SDValue ShiftR =
+      SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
+                                     S->getI64Imm(63, dl)), 0);
+    SDValue SubtractCarry =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
+                                     LHS, RHS), 1);
+    return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
+                                          ShiftR, ShiftL, SubtractCarry), 0);
+  }
+  case ISD::SETGT: {
+    // {subc.reg, subc.CA} = (subcarry %b, %a)
+    // (zext (setcc %a, %b, setgt)) ->
+    //   (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
+    // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
+    if (IsRHSNegOne)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
+    if (IsRHSZero) {
+      SDValue Addi =
+        SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
+                                       S->getI64Imm(~0ULL, dl)), 0);
+      SDValue Nor =
+        SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
+      return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
+                                            S->getI64Imm(1, dl),
+                                            S->getI64Imm(63, dl)), 0);
+    }
+    std::swap(LHS, RHS);
+    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+    IsRHSZero = RHSConst && RHSConst->isNullValue();
+    IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
+    LLVM_FALLTHROUGH;
+  }
+  case ISD::SETLT: {
+    // {subc.reg, subc.CA} = (subcarry %a, %b)
+    // (zext (setcc %a, %b, setlt)) ->
+    //   (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
+    // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
+    if (IsRHSOne)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
+    if (IsRHSZero)
+      return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
+                                            S->getI64Imm(1, dl),
+                                            S->getI64Imm(63, dl)), 0);
+    SDValue SRADINode =
+      SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
+                                     LHS, S->getI64Imm(63, dl)), 0);
+    SDValue SRDINode =
+      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
+                                     RHS, S->getI64Imm(1, dl),
+                                     S->getI64Imm(63, dl)), 0);
+    SDValue SUBFC8Carry =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
+                                     RHS, LHS), 1);
+    SDValue ADDE8Node =
+      SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
+                                     SRDINode, SRADINode, SUBFC8Carry), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
+                                          ADDE8Node, S->getI64Imm(1, dl)), 0);
+  }
+  case ISD::SETUGE:
+    // {subc.reg, subc.CA} = (subcarry %a, %b)
+    // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
+  case ISD::SETULE: {
+    // {subc.reg, subc.CA} = (subcarry %b, %a)
+    // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
+    SDValue SUBFC8Carry =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
+                                     LHS, RHS), 1);
+    SDValue SUBFE8Node =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
+                                     LHS, LHS, SUBFC8Carry), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
+                                          SUBFE8Node, S->getI64Imm(1, dl)), 0);
+  }
+  case ISD::SETUGT:
+    // {subc.reg, subc.CA} = (subcarry %b, %a)
+    // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
+  case ISD::SETULT: {
+    // {subc.reg, subc.CA} = (subcarry %a, %b)
+    // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
+    SDValue SubtractCarry =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
+                                     RHS, LHS), 1);
+    SDValue ExtSub =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
+                                     LHS, LHS, SubtractCarry), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
+                                          ExtSub), 0);
+  }
+  }
+}
+
+/// Produces a sign-extended result of comparing two 64-bit values according to
+/// the passed condition code.
+SDValue
+IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
+                                              ISD::CondCode CC,
+                                              int64_t RHSValue, SDLoc dl) {
+  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
+      CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)
+    return SDValue();
+  bool IsRHSZero = RHSValue == 0;
+  bool IsRHSOne = RHSValue == 1;
+  bool IsRHSNegOne = RHSValue == -1LL;
+  switch (CC) {
+  default: return SDValue();
+  case ISD::SETEQ: {
+    // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
+    // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
+    // {addcz.reg, addcz.CA} = (addcarry %a, -1)
+    // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
+    SDValue AddInput = IsRHSZero ? LHS :
+      SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
+    SDValue Addic =
+      SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
+                                     AddInput, S->getI32Imm(~0U, dl)), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
+                                          Addic, Addic.getValue(1)), 0);
+  }
+  case ISD::SETNE: {
+    // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
+    // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
+    // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
+    // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
+    SDValue Xor = IsRHSZero ? LHS :
+      SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
+    SDValue SC =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
+                                     Xor, S->getI32Imm(0, dl)), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
+                                          SC, SC.getValue(1)), 0);
+  }
+  case ISD::SETGE: {
+    // {subc.reg, subc.CA} = (subcarry %a, %b)
+    // (zext (setcc %a, %b, setge)) ->
+    //   (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
+    // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
+    if (IsRHSZero)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
+    std::swap(LHS, RHS);
+    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+    IsRHSZero = RHSConst && RHSConst->isNullValue();
+    LLVM_FALLTHROUGH;
+  }
+  case ISD::SETLE: {
+    // {subc.reg, subc.CA} = (subcarry %b, %a)
+    // (zext (setcc %a, %b, setge)) ->
+    //   (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
+    // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
+    if (IsRHSZero)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
+    SDValue ShiftR =
+      SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
+                                     S->getI64Imm(63, dl)), 0);
+    SDValue ShiftL =
+      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
+                                     S->getI64Imm(1, dl),
+                                     S->getI64Imm(63, dl)), 0);
+    SDValue SubtractCarry =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
+                                     LHS, RHS), 1);
+    SDValue Adde =
+      SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
+                                     ShiftR, ShiftL, SubtractCarry), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
+  }
+  case ISD::SETGT: {
+    // {subc.reg, subc.CA} = (subcarry %b, %a)
+    // (zext (setcc %a, %b, setgt)) ->
+    //   -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
+    // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
+    if (IsRHSNegOne)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
+    if (IsRHSZero) {
+      SDValue Add =
+        SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
+                                       S->getI64Imm(-1, dl)), 0);
+      SDValue Nor =
+        SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
+      return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
+                                            S->getI64Imm(63, dl)), 0);
+    }
+    std::swap(LHS, RHS);
+    ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+    IsRHSZero = RHSConst && RHSConst->isNullValue();
+    IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
+    LLVM_FALLTHROUGH;
+  }
+  case ISD::SETLT: {
+    // {subc.reg, subc.CA} = (subcarry %a, %b)
+    // (zext (setcc %a, %b, setlt)) ->
+    //   -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
+    // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
+    if (IsRHSOne)
+      return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
+    if (IsRHSZero) {
+      return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
+                                            S->getI64Imm(63, dl)), 0);
+    }
+    SDValue SRADINode =
+      SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
+                                     LHS, S->getI64Imm(63, dl)), 0);
+    SDValue SRDINode =
+      SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
+                                     RHS, S->getI64Imm(1, dl),
+                                     S->getI64Imm(63, dl)), 0);
+    SDValue SUBFC8Carry =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
+                                     RHS, LHS), 1);
+    SDValue ADDE8Node =
+      SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
+                                     SRDINode, SRADINode, SUBFC8Carry), 0);
+    SDValue XORI8Node =
+      SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
+                                     ADDE8Node, S->getI64Imm(1, dl)), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
+                                          XORI8Node), 0);
+  }
+  case ISD::SETUGE:
+    // {subc.reg, subc.CA} = (subcarry %a, %b)
+    // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
+  case ISD::SETULE: {
+    // {subc.reg, subc.CA} = (subcarry %b, %a)
+    // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
+    SDValue SubtractCarry =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
+                                     LHS, RHS), 1);
+    SDValue ExtSub =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
+                                     LHS, SubtractCarry), 0);
+    return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
+                                          ExtSub, ExtSub), 0);
+  }
+  case ISD::SETUGT:
+    // {subc.reg, subc.CA} = (subcarry %b, %a)
+    // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
+  case ISD::SETULT: {
+    // {subc.reg, subc.CA} = (subcarry %a, %b)
+    // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
+    SDValue SubCarry =
+      SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
+                                     RHS, LHS), 1);
+    return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
+                                     LHS, LHS, SubCarry), 0);
+  }
+  }
+}
+
+/// Do all uses of this SDValue need the result in a GPR?
+/// This is meant to be used on values that have type i1 since
+/// it is somewhat meaningless to ask if values of other types
+/// should be kept in GPR's.
+static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
+  assert(Compare.getOpcode() == ISD::SETCC &&
+         "An ISD::SETCC node required here.");
+
+  // For values that have a single use, the caller should obviously already have
+  // checked if that use is an extending use. We check the other uses here.
+  if (Compare.hasOneUse())
+    return true;
+  // We want the value in a GPR if it is being extended, used for a select, or
+  // used in logical operations.
+  for (auto CompareUse : Compare.getNode()->uses())
+    if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
+        CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
+        CompareUse->getOpcode() != ISD::SELECT &&
+        !isLogicOp(CompareUse->getOpcode())) {
+      OmittedForNonExtendUses++;
+      return false;
+    }
+  return true;
+}
+
+/// Returns an equivalent of a SETCC node but with the result the same width as
+/// the inputs. This can nalso be used for SELECT_CC if either the true or false
+/// values is a power of two while the other is zero.
+SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
+                                                SetccInGPROpts ConvOpts) {
+  assert((Compare.getOpcode() == ISD::SETCC ||
+          Compare.getOpcode() == ISD::SELECT_CC) &&
+         "An ISD::SETCC node required here.");
+
+  // Don't convert this comparison to a GPR sequence because there are uses
+  // of the i1 result (i.e. uses that require the result in the CR).
+  if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
+    return SDValue();
+
+  SDValue LHS = Compare.getOperand(0);
+  SDValue RHS = Compare.getOperand(1);
+
+  // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
+  int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
+  ISD::CondCode CC =
+    cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
+  EVT InputVT = LHS.getValueType();
+  if (InputVT != MVT::i32 && InputVT != MVT::i64)
+    return SDValue();
+
+  if (ConvOpts == SetccInGPROpts::ZExtInvert ||
+      ConvOpts == SetccInGPROpts::SExtInvert)
+    CC = ISD::getSetCCInverse(CC, true);
+
+  bool Inputs32Bit = InputVT == MVT::i32;
+
+  SDLoc dl(Compare);
+  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+  int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
+  bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
+    ConvOpts == SetccInGPROpts::SExtInvert;
+
+  if (IsSext && Inputs32Bit)
+    return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
+  else if (Inputs32Bit)
+    return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
+  else if (IsSext)
+    return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
+  return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
+}
+
 } // end anonymous namespace
 
+bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
+  if (N->getValueType(0) != MVT::i32 &&
+      N->getValueType(0) != MVT::i64)
+    return false;
+
+  // This optimization will emit code that assumes 64-bit registers
+  // so we don't want to run it in 32-bit mode. Also don't run it
+  // on functions that are not to be optimized.
+  if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
+    return false;
+
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::ZERO_EXTEND:
+  case ISD::SIGN_EXTEND:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: {
+    IntegerCompareEliminator ICmpElim(CurDAG, this);
+    if (SDNode *New = ICmpElim.Select(N)) {
+      ReplaceNode(N, New);
+      return true;
+    }
+  }
+  }
+  return false;
+}
+
 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
   if (N->getValueType(0) != MVT::i32 &&
       N->getValueType(0) != MVT::i64)
@@ -2578,6 +3810,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
   if (tryBitPermutation(N))
     return;
 
+  // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
+  if (tryIntCompareInGPR(N))
+    return;
+
   switch (N->getOpcode()) {
   default: break;
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2017-11-30 13:39:10 +0000
committer	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2017-11-30 13:39:10 +0000
commit	db7e77047c2432742dd1dd26f3cc96ee180c9c34 (patch)
tree	ab15236c0d74712dec35f23b60f5a42b2fa65c26 /llvm/lib
parent	bb791b3dbdc009412c1a27b14b22091b15d3728b (diff)
download	bcm5719-llvm-db7e77047c2432742dd1dd26f3cc96ee180c9c34.tar.gz bcm5719-llvm-db7e77047c2432742dd1dd26f3cc96ee180c9c34.zip