2 files changed, 30 insertions, 81 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f81a3082ed2..eb3f0350846 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26135,56 +26135,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// Combine:
-///   (brcond/cmov/setcc .., (cmp (atomic_load_op ..), 0), cc)
-/// to:
-///   (brcond/cmov/setcc .., (LOCKed op ..), cc)
-/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
-/// Note that this is only legal for some op/cc combinations.
-static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode CC,
-                                       SelectionDAG &DAG) {
-  // This combine only operates on CMP-like nodes.
-  if (!(Cmp.getOpcode() == X86ISD::CMP ||
-        (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
-    return SDValue();
-
-  SDValue LHS = Cmp.getOperand(0);
-  SDValue RHS = Cmp.getOperand(1);
-
-  if (!LHS.hasOneUse())
-    return SDValue();
-
-  // FIXME: We can do this for XOR/OR/AND as well, but only if they survive
-  // AtomicExpand. Currently, we choose to expand them to cmpxchg if they
-  // have any users. Could we relax that to ignore (icmp x,0) users?
-  switch (LHS->getOpcode()) {
-  case ISD::ATOMIC_LOAD_ADD:
-  case ISD::ATOMIC_LOAD_SUB:
-    break;
-  default:
-    return SDValue();
-  }
-
-  auto *C = dyn_cast<ConstantSDNode>(RHS);
-  if (!C || C->getZExtValue() != 0)
-    return SDValue();
-
-  // Don't do this for all condition codes, as OF/CF are cleared by (CMP x,0)
-  // but might be set by arithmetic. Furthermore, we might later select INC/DEC,
-  // which don't modify CF (though CCs using CF should have been optimized out).
-  // SF/ZF are safe as they are set the same way.
-  // Note that in theory, the transformation is also valid for P/NP.
-  if (CC != X86::COND_E && CC != X86::COND_NE && CC != X86::COND_S &&
-      CC != X86::COND_NS)
-    return SDValue();
-
-  SDValue LockOp = lowerAtomicArithWithLOCK(LHS, DAG);
-  DAG.ReplaceAllUsesOfValueWith(LHS.getValue(0),
-                                DAG.getUNDEF(LHS.getValueType()));
-  DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LockOp.getValue(1));
-  return LockOp;
-}
-
 // Check whether a boolean test is testing a boolean value generated by
 // X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
 // code.
@@ -26356,16 +26306,6 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
   return true;
 }
 
-/// Optimize an EFLAGS definition used according to the condition code \p CC
-/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
-/// uses of chain values.
-static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
-                                  SelectionDAG &DAG) {
-  if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
-    return R;
-  return combineSetCCAtomicArith(EFLAGS, CC, DAG);
-}
-
 /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
 static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
                            TargetLowering::DAGCombinerInfo &DCI,
@@ -26392,14 +26332,15 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Try to simplify the EFLAGS and condition code operands.
-  // We can't always do this as FCMOV only supports a subset of X86 cond.
-  if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG)) {
-    if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) {
-      SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8),
-        Flags};
-      return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
-    }
+  SDValue Flags;
+
+  Flags = checkBoolTestSetCCCombine(Cond, CC);
+  if (Flags.getNode() &&
+      // Extra check as FCMOV only supports a subset of X86 cond.
+      (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
+    SDValue Ops[] = { FalseOp, TrueOp,
+                      DAG.getConstant(CC, DL, MVT::i8), Flags };
+    return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
   }
 
   // If this is a select between two integer constants, try to do some
@@ -29325,8 +29266,7 @@ static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
   if (CC == X86::COND_B)
     return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
 
-  // Try to simplify the EFLAGS and condition code operands.
-  if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
+  if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
     SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
     return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
   }
@@ -29339,16 +29279,15 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
                              TargetLowering::DAGCombinerInfo &DCI,
                              const X86Subtarget &Subtarget) {
   SDLoc DL(N);
+  SDValue Chain = N->getOperand(0);
+  SDValue Dest = N->getOperand(1);
   SDValue EFLAGS = N->getOperand(3);
   X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
 
-  // Try to simplify the EFLAGS and condition code operands.
-  // Make sure to not keep references to operands, as combineSetCCEFLAGS can
-  // RAUW them under us.
-  if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
+  if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
     SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
-    return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
-                       N->getOperand(1), Cond, Flags);
+    return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
+                       Flags);
   }
 
   return SDValue();
diff --git a/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll b/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll
index 18581b47d11..152d7e0ea71 100644
--- a/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll
+++ b/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll
@@ -4,7 +4,9 @@
 define i8 @test_add_1_setcc_ne(i64* %p) #0 {
 ; CHECK-LABEL: test_add_1_setcc_ne:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    lock incq (%rdi)
+; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    retq
 entry:
@@ -17,7 +19,9 @@ entry:
 define i8 @test_sub_1_setcc_eq(i64* %p) #0 {
 ; CHECK-LABEL: test_sub_1_setcc_eq:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    lock decq (%rdi)
+; CHECK-NEXT:    movq $-1, %rax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    retq
 entry:
@@ -45,7 +49,9 @@ entry:
 define i8 @test_sub_10_setcc_sge(i64* %p) #0 {
 ; CHECK-LABEL: test_sub_10_setcc_sge:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    lock addq $-10, (%rdi)
+; CHECK-NEXT:    movq $-10, %rax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    setns %al
 ; CHECK-NEXT:    retq
 entry:
@@ -60,7 +66,9 @@ entry:
 define i32 @test_add_10_brcond_sge(i64* %p, i32 %a0, i32 %a1) #0 {
 ; CHECK-LABEL: test_add_10_brcond_sge:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    lock addq $10, (%rdi)
+; CHECK-NEXT:    movl $10, %eax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    js .LBB4_2
 ; CHECK-NEXT:  # BB#1: # %t
 ; CHECK-NEXT:    movl %esi, %eax
@@ -81,7 +89,9 @@ f:
 define i32 @test_sub_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 {
 ; CHECK-LABEL: test_sub_1_cmov_slt:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    lock decq (%rdi)
+; CHECK-NEXT:    movq $-1, %rax
+; CHECK-NEXT:    lock xaddq %rax, (%rdi)
+; CHECK-NEXT:    testq %rax, %rax
 ; CHECK-NEXT:    cmovnsl %edx, %esi
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    retq