Optimize atomic lock or that doesn't use the result value.

Next up: xor and and. Part of rdar://8470697 llvm-svn: 131171
author: Eric Christopher <echristo@apple.com> 2011-05-10 23:57:45 +0000
committer: Eric Christopher <echristo@apple.com> 2011-05-10 23:57:45 +0000
commit: 4a34e61e53b4a87365b0352256735e104b84d055 (patch)
tree: df8606e74d7925abff8e81f695c894d072d8c72a /llvm/lib
parent: dfbf9341adc350019740750573ba02185a978c60 (diff)
download: bcm5719-llvm-4a34e61e53b4a87365b0352256735e104b84d055.tar.gz
bcm5719-llvm-4a34e61e53b4a87365b0352256735e104b84d055.zip
2 files changed, 83 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 4534e853914..de7aafff4b5 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -189,6 +189,7 @@ namespace {
     SDNode *Select(SDNode *N);
     SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
     SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
+    SDNode *SelectAtomicLoadOr(SDNode *Node, EVT NVT);
 
     bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
     bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
@@ -1329,6 +1330,8 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
   return ResNode;
 }
 
+// FIXME: Figure out some way to unify this with the 'or' and other code
+// below.
 SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
   if (Node->hasAnyUseOfValue(0))
     return 0;
@@ -1479,6 +1482,78 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
   }
 }
 
+SDNode *X86DAGToDAGISel::SelectAtomicLoadOr(SDNode *Node, EVT NVT) {
+  if (Node->hasAnyUseOfValue(0))
+    return 0;
+  
+  // Optimize common patterns for __sync_or_and_fetch  where the result
+  // is not used. This allows us to use the "lock" version of the or
+  // instruction.
+  // FIXME: Same as for 'add' and 'sub'.
+  SDValue Chain = Node->getOperand(0);
+  SDValue Ptr = Node->getOperand(1);
+  SDValue Val = Node->getOperand(2);
+  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+  if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+    return 0;
+
+  bool isCN = false;
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+  if (CN) {
+    isCN = true;
+    Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
+  }
+  
+  unsigned Opc = 0;
+  switch (NVT.getSimpleVT().SimpleTy) {
+    default: return 0;
+    case MVT::i8:
+      if (isCN)
+        Opc = X86::LOCK_OR8mi;
+      else
+        Opc = X86::LOCK_OR8mr;
+      break;
+    case MVT::i16:
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_OR16mi8;
+        else
+          Opc = X86::LOCK_OR16mi;
+      } else
+        Opc = X86::LOCK_OR16mr;
+      break;
+    case MVT::i32:
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_OR32mi8;
+        else
+          Opc = X86::LOCK_OR32mi;
+      } else
+        Opc = X86::LOCK_OR32mr;
+      break;
+    case MVT::i64:
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_OR64mi8;
+        else if (i64immSExt32(Val.getNode()))
+          Opc = X86::LOCK_OR64mi32;
+      } else
+        Opc = X86::LOCK_OR64mr;
+      break;
+  }
+  
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                 dl, NVT), 0);
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+  SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+  SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+  cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+  SDValue RetVals[] = { Undef, Ret };
+  return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+}
+
 /// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
 /// any uses which require the SF or OF bits to be accurate.
 static bool HasNoSignedComparisonUses(SDNode *N) {
@@ -1580,6 +1655,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       return RetVal;
     break;
   }
+  case ISD::ATOMIC_LOAD_OR: {
+    SDNode *RetVal = SelectAtomicLoadOr(Node, NVT);
+    if (RetVal)
+      return RetVal;
+    break;
+  }
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR: {
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 662fddf6c81..31b33ab83a3 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -580,7 +580,7 @@ def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
                      ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
                      ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
                      !strconcat("lock\n\t", mnemonic, "{b}\t",
-                     "{$src2, $dst|$dst, $src2}"),
+                                "{$src2, $dst|$dst, $src2}"),
                      []>, LOCK;
 
 def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
@@ -629,6 +629,7 @@ def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
 
 defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, "add">;
 defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, "sub">;
+defm LOCK_OR  : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, "or">;
 
 // Optimized codegen when the non-memory output is not used.
 let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
author	Eric Christopher <echristo@apple.com>	2011-05-10 23:57:45 +0000
committer	Eric Christopher <echristo@apple.com>	2011-05-10 23:57:45 +0000
commit	4a34e61e53b4a87365b0352256735e104b84d055 (patch)
tree	df8606e74d7925abff8e81f695c894d072d8c72a /llvm/lib
parent	dfbf9341adc350019740750573ba02185a978c60 (diff)
download	bcm5719-llvm-4a34e61e53b4a87365b0352256735e104b84d055.tar.gz bcm5719-llvm-4a34e61e53b4a87365b0352256735e104b84d055.zip