summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp91
-rw-r--r--llvm/test/CodeGen/X86/atomic-eflags-reuse.ll20
2 files changed, 81 insertions, 30 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1c488ae0dda..ea5518ed4c6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26134,6 +26134,56 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Combine:
+/// (brcond/cmov/setcc .., (cmp (atomic_load_op ..), 0), cc)
+/// to:
+/// (brcond/cmov/setcc .., (LOCKed op ..), cc)
+/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
+/// Note that this is only legal for some op/cc combinations.
+static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode CC,
+ SelectionDAG &DAG) {
+ // This combine only operates on CMP-like nodes.
+ if (!(Cmp.getOpcode() == X86ISD::CMP ||
+ (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
+ return SDValue();
+
+ SDValue LHS = Cmp.getOperand(0);
+ SDValue RHS = Cmp.getOperand(1);
+
+ if (!LHS.hasOneUse())
+ return SDValue();
+
+ // FIXME: We can do this for XOR/OR/AND as well, but only if they survive
+ // AtomicExpand. Currently, we choose to expand them to cmpxchg if they
+ // have any users. Could we relax that to ignore (icmp x,0) users?
+ switch (LHS->getOpcode()) {
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ break;
+ default:
+ return SDValue();
+ }
+
+ auto *C = dyn_cast<ConstantSDNode>(RHS);
+ if (!C || C->getZExtValue() != 0)
+ return SDValue();
+
+ // Don't do this for all condition codes, as OF/CF are cleared by (CMP x,0)
+ // but might be set by arithmetic. Furthermore, we might later select INC/DEC,
+ // which don't modify CF (though CCs using CF should have been optimized out).
+ // SF/ZF are safe as they are set the same way.
+ // Note that in theory, the transformation is also valid for P/NP.
+ if (CC != X86::COND_E && CC != X86::COND_NE && CC != X86::COND_S &&
+ CC != X86::COND_NS)
+ return SDValue();
+
+ SDValue LockOp = lowerAtomicArithWithLOCK(LHS, DAG);
+ DAG.ReplaceAllUsesOfValueWith(LHS.getValue(0),
+ DAG.getUNDEF(LHS.getValueType()));
+ DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LockOp.getValue(1));
+ return LockOp;
+}
+
// Check whether a boolean test is testing a boolean value generated by
// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
// code.
@@ -26305,6 +26355,16 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
return true;
}
+/// Optimize an EFLAGS definition used according to the condition code \p CC
+/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
+/// uses of chain values.
+static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
+ SelectionDAG &DAG) {
+ if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
+ return R;
+ return combineSetCCAtomicArith(EFLAGS, CC, DAG);
+}
+
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -26331,15 +26391,14 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
}
}
- SDValue Flags;
-
- Flags = checkBoolTestSetCCCombine(Cond, CC);
- if (Flags.getNode() &&
- // Extra check as FCMOV only supports a subset of X86 cond.
- (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
- SDValue Ops[] = { FalseOp, TrueOp,
- DAG.getConstant(CC, DL, MVT::i8), Flags };
- return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
+ // Try to simplify the EFLAGS and condition code operands.
+ // We can't always do this as FCMOV only supports a subset of X86 cond.
+ if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG)) {
+ if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) {
+ SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8),
+ Flags};
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
+ }
}
// If this is a select between two integer constants, try to do some
@@ -29265,7 +29324,8 @@ static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
if (CC == X86::COND_B)
return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
- if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
+ // Try to simplify the EFLAGS and condition code operands.
+ if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
}
@@ -29278,15 +29338,16 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
- SDValue Chain = N->getOperand(0);
- SDValue Dest = N->getOperand(1);
SDValue EFLAGS = N->getOperand(3);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));
- if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
+ // Try to simplify the EFLAGS and condition code operands.
+ // Make sure to not keep references to operands, as combineSetCCEFLAGS can
+ // RAUW them under us.
+ if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
- return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
- Flags);
+ return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
+ N->getOperand(1), Cond, Flags);
}
return SDValue();
diff --git a/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll b/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll
index 152d7e0ea71..18581b47d11 100644
--- a/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll
+++ b/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll
@@ -4,9 +4,7 @@
define i8 @test_add_1_setcc_ne(i64* %p) #0 {
; CHECK-LABEL: test_add_1_setcc_ne:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: lock incq (%rdi)
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
entry:
@@ -19,9 +17,7 @@ entry:
define i8 @test_sub_1_setcc_eq(i64* %p) #0 {
; CHECK-LABEL: test_sub_1_setcc_eq:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movq $-1, %rax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: lock decq (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
entry:
@@ -49,9 +45,7 @@ entry:
define i8 @test_sub_10_setcc_sge(i64* %p) #0 {
; CHECK-LABEL: test_sub_10_setcc_sge:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movq $-10, %rax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: lock addq $-10, (%rdi)
; CHECK-NEXT: setns %al
; CHECK-NEXT: retq
entry:
@@ -66,9 +60,7 @@ entry:
define i32 @test_add_10_brcond_sge(i64* %p, i32 %a0, i32 %a1) #0 {
; CHECK-LABEL: test_add_10_brcond_sge:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movl $10, %eax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: lock addq $10, (%rdi)
; CHECK-NEXT: js .LBB4_2
; CHECK-NEXT: # BB#1: # %t
; CHECK-NEXT: movl %esi, %eax
@@ -89,9 +81,7 @@ f:
define i32 @test_sub_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 {
; CHECK-LABEL: test_sub_1_cmov_slt:
; CHECK: # BB#0: # %entry
-; CHECK-NEXT: movq $-1, %rax
-; CHECK-NEXT: lock xaddq %rax, (%rdi)
-; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: lock decq (%rdi)
; CHECK-NEXT: cmovnsl %edx, %esi
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: retq
OpenPOWER on IntegriCloud