[x86] Extend the manual ISel of `add` and `sub` with both RMW memory

operands and used flags to support matching immediate operands. This is a bit trickier than register operands, and we still want to fall back on a register operands even for things that appear to be "immediates" when they won't actually select into the operation's immediate operand. This also requires us to handle things like selecting `sub` vs. `add` to minimize the number of bits needed to represent the immediate, and picking the shortest immediate encoding. In order to that, we in turn need to scan to make sure that CF isn't used as it will get inverted. The end result seems very nice though, and we're now generating optimal instruction sequences for these patterns IMO. A follow-up patch will further expand this to other operations with RMW memory operands. But handing `add` and `sub` are useful starting points to flesh out the machinery and make sure interesting and complex cases can be handled. Thanks to Craig Topper who provided a few fixes and improvements to this patch in addition to the review! Differential Revision: https://reviews.llvm.org/D37139 llvm-svn: 312764
author: Chandler Carruth <chandlerc@gmail.com> 2017-09-07 23:54:24 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2017-09-07 23:54:24 +0000
commit: 52a31bf268126442540e7c0846f51061e98f4867 (patch)
tree: 5f79a974c6c16160816b6503afd7cb9b0a49bd89 /llvm/lib/Target
parent: d63ac339483a4af48a2a3f445e442a4be1302c3d (diff)
download: bcm5719-llvm-52a31bf268126442540e7c0846f51061e98f4867.tar.gz
bcm5719-llvm-52a31bf268126442540e7c0846f51061e98f4867.zip
1 files changed, 142 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 2fec38d7c40..17ee5ca2d4b 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1932,6 +1932,73 @@ static bool hasNoSignedComparisonUses(SDNode *N) {
   return true;
 }
 
+/// Test whether the given node which sets flags has any uses which require the
+/// CF flag to be accurate.
+static bool hasNoCarryFlagUses(SDNode *N) {
+  // Examine each user of the node.
+  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
+       ++UI) {
+    // Only check things that use the flags.
+    if (UI.getUse().getResNo() != 1)
+      continue;
+    // Only examine CopyToReg uses.
+    if (UI->getOpcode() != ISD::CopyToReg)
+      return false;
+    // Only examine CopyToReg uses that copy to EFLAGS.
+    if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
+      return false;
+    // Examine each user of the CopyToReg use.
+    for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
+         FlagUI != FlagUE; ++FlagUI) {
+      // Only examine the Flag result.
+      if (FlagUI.getUse().getResNo() != 1)
+        continue;
+      // Anything unusual: assume conservatively.
+      if (!FlagUI->isMachineOpcode())
+        return false;
+      // Examine the opcode of the user.
+      switch (FlagUI->getMachineOpcode()) {
+      // Comparisons which don't examine the CF flag.
+      case X86::SETOr: case X86::SETNOr: case X86::SETEr: case X86::SETNEr:
+      case X86::SETSr: case X86::SETNSr: case X86::SETPr: case X86::SETNPr:
+      case X86::SETLr: case X86::SETGEr: case X86::SETLEr: case X86::SETGr:
+      case X86::JO_1: case X86::JNO_1: case X86::JE_1: case X86::JNE_1:
+      case X86::JS_1: case X86::JNS_1: case X86::JP_1: case X86::JNP_1:
+      case X86::JL_1: case X86::JGE_1: case X86::JLE_1: case X86::JG_1:
+      case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr:
+      case X86::CMOVO16rm: case X86::CMOVO32rm: case X86::CMOVO64rm:
+      case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr:
+      case X86::CMOVNO16rm: case X86::CMOVNO32rm: case X86::CMOVNO64rm:
+      case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr:
+      case X86::CMOVE16rm: case X86::CMOVE32rm: case X86::CMOVE64rm:
+      case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr:
+      case X86::CMOVNE16rm: case X86::CMOVNE32rm: case X86::CMOVNE64rm:
+      case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr:
+      case X86::CMOVS16rm: case X86::CMOVS32rm: case X86::CMOVS64rm:
+      case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr:
+      case X86::CMOVNS16rm: case X86::CMOVNS32rm: case X86::CMOVNS64rm:
+      case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr:
+      case X86::CMOVP16rm: case X86::CMOVP32rm: case X86::CMOVP64rm:
+      case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr:
+      case X86::CMOVNP16rm: case X86::CMOVNP32rm: case X86::CMOVNP64rm:
+      case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr:
+      case X86::CMOVL16rm: case X86::CMOVL32rm: case X86::CMOVL64rm:
+      case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr:
+      case X86::CMOVGE16rm: case X86::CMOVGE32rm: case X86::CMOVGE64rm:
+      case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr:
+      case X86::CMOVLE16rm: case X86::CMOVLE32rm: case X86::CMOVLE64rm:
+      case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr:
+      case X86::CMOVG16rm: case X86::CMOVG32rm: case X86::CMOVG64rm:
+        continue;
+      // Anything else: assume conservatively.
+      default:
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 /// Check whether or not the chain ending in StoreNode is suitable for doing
 /// the {load; op; store} to modify transformation.
 static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode,
@@ -2064,8 +2131,8 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
                   Segment))
     return false;
 
-  auto SelectOpcodeForSize = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
-                                 unsigned Opc8) {
+  auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16,
+                          unsigned Opc8 = 0) {
     switch (MemVT.getSimpleVT().SimpleTy) {
     case MVT::i64:
       return Opc64;
@@ -2084,11 +2151,10 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
   switch (Opc) {
   case X86ISD::INC:
   case X86ISD::DEC: {
-    unsigned NewOpc = Opc == X86ISD::INC
-                          ? SelectOpcodeForSize(X86::INC64m, X86::INC32m,
-                                                X86::INC16m, X86::INC8m)
-                          : SelectOpcodeForSize(X86::DEC64m, X86::DEC32m,
-                                                X86::DEC16m, X86::DEC8m);
+    unsigned NewOpc =
+        Opc == X86ISD::INC
+            ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
+            : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
     const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
     Result =
         CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
@@ -2096,14 +2162,75 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
   }
   case X86ISD::ADD:
   case X86ISD::SUB: {
-    unsigned NewOpc = Opc == X86ISD::ADD
-                          ? SelectOpcodeForSize(X86::ADD64mr, X86::ADD32mr,
-                                                X86::ADD16mr, X86::ADD8mr)
-                          : SelectOpcodeForSize(X86::SUB64mr, X86::SUB32mr,
-                                                X86::SUB16mr, X86::SUB8mr);
-    const SDValue Ops[] = {Base,      Scale,   Index,
-                           Disp,      Segment, StoredVal->getOperand(1),
-                           InputChain};
+    auto SelectRegOpcode = [SelectOpcode](unsigned Opc) {
+      switch (Opc) {
+      case X86ISD::ADD:
+        return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr,
+                            X86::ADD8mr);
+      case X86ISD::SUB:
+        return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr,
+                            X86::SUB8mr);
+      default:
+        llvm_unreachable("Invalid opcode!");
+      }
+    };
+    auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
+      switch (Opc) {
+      case X86ISD::ADD:
+        return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8);
+      case X86ISD::SUB:
+        return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8);
+      default:
+        llvm_unreachable("Invalid opcode!");
+      }
+    };
+    auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
+      switch (Opc) {
+      case X86ISD::ADD:
+        return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi,
+                            X86::ADD8mi);
+      case X86ISD::SUB:
+        return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi,
+                            X86::SUB8mi);
+      default:
+        llvm_unreachable("Invalid opcode!");
+      }
+    };
+
+    unsigned NewOpc = SelectRegOpcode(Opc);
+    SDValue Operand = StoredVal->getOperand(1);
+
+    // See if the operand is a constant that we can fold into an immediate
+    // operand.
+    if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) {
+      auto OperandV = OperandC->getAPIntValue();
+
+      // Check if we can shrink the operand enough to fit in an immediate (or
+      // fit into a smaller immediate) by negating it and switching the
+      // operation.
+      if (((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 &&
+            (-OperandV).getMinSignedBits() <= 8) ||
+           (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 &&
+            (-OperandV).getMinSignedBits() <= 32)) &&
+          hasNoCarryFlagUses(StoredVal.getNode())) {
+        OperandV = -OperandV;
+        Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
+      }
+
+      // First try to fit this into an Imm8 operand. If it doesn't fit, then try
+      // the larger immediate operand.
+      if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) {
+        Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
+        NewOpc = SelectImm8Opcode(Opc);
+      } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() &&
+                 (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) {
+        Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
+        NewOpc = SelectImmOpcode(Opc);
+      }
+    }
+
+    const SDValue Ops[] = {Base,    Scale,   Index,     Disp,
+                           Segment, Operand, InputChain};
     Result =
         CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
     break;
author	Chandler Carruth <chandlerc@gmail.com>	2017-09-07 23:54:24 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2017-09-07 23:54:24 +0000
commit	52a31bf268126442540e7c0846f51061e98f4867 (patch)
tree	5f79a974c6c16160816b6503afd7cb9b0a49bd89 /llvm/lib/Target
parent	d63ac339483a4af48a2a3f445e442a4be1302c3d (diff)
download	bcm5719-llvm-52a31bf268126442540e7c0846f51061e98f4867.tar.gz bcm5719-llvm-52a31bf268126442540e7c0846f51061e98f4867.zip