diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 91 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/atomic-eflags-reuse.ll | 20 |
2 files changed, 30 insertions, 81 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f81a3082ed2..eb3f0350846 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26135,56 +26135,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// Combine: -/// (brcond/cmov/setcc .., (cmp (atomic_load_op ..), 0), cc) -/// to: -/// (brcond/cmov/setcc .., (LOCKed op ..), cc) -/// i.e., reusing the EFLAGS produced by the LOCKed instruction. -/// Note that this is only legal for some op/cc combinations. -static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode CC, - SelectionDAG &DAG) { - // This combine only operates on CMP-like nodes. - if (!(Cmp.getOpcode() == X86ISD::CMP || - (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) - return SDValue(); - - SDValue LHS = Cmp.getOperand(0); - SDValue RHS = Cmp.getOperand(1); - - if (!LHS.hasOneUse()) - return SDValue(); - - // FIXME: We can do this for XOR/OR/AND as well, but only if they survive - // AtomicExpand. Currently, we choose to expand them to cmpxchg if they - // have any users. Could we relax that to ignore (icmp x,0) users? - switch (LHS->getOpcode()) { - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - break; - default: - return SDValue(); - } - - auto *C = dyn_cast<ConstantSDNode>(RHS); - if (!C || C->getZExtValue() != 0) - return SDValue(); - - // Don't do this for all condition codes, as OF/CF are cleared by (CMP x,0) - // but might be set by arithmetic. Furthermore, we might later select INC/DEC, - // which don't modify CF (though CCs using CF should have been optimized out). - // SF/ZF are safe as they are set the same way. - // Note that in theory, the transformation is also valid for P/NP. - if (CC != X86::COND_E && CC != X86::COND_NE && CC != X86::COND_S && - CC != X86::COND_NS) - return SDValue(); - - SDValue LockOp = lowerAtomicArithWithLOCK(LHS, DAG); - DAG.ReplaceAllUsesOfValueWith(LHS.getValue(0), - DAG.getUNDEF(LHS.getValueType())); - DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LockOp.getValue(1)); - return LockOp; -} - // Check whether a boolean test is testing a boolean value generated by // X86ISD::SETCC. If so, return the operand of that SETCC and proper condition // code. @@ -26356,16 +26306,6 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, return true; } -/// Optimize an EFLAGS definition used according to the condition code \p CC -/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing -/// uses of chain values. -static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC, - SelectionDAG &DAG) { - if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC)) - return R; - return combineSetCCAtomicArith(EFLAGS, CC, DAG); -} - /// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -26392,14 +26332,15 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, } } - // Try to simplify the EFLAGS and condition code operands. - // We can't always do this as FCMOV only supports a subset of X86 cond. - if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG)) { - if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) { - SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8), - Flags}; - return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops); - } + SDValue Flags; + + Flags = checkBoolTestSetCCCombine(Cond, CC); + if (Flags.getNode() && + // Extra check as FCMOV only supports a subset of X86 cond. + (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) { + SDValue Ops[] = { FalseOp, TrueOp, + DAG.getConstant(CC, DL, MVT::i8), Flags }; + return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops); } // If this is a select between two integer constants, try to do some @@ -29325,8 +29266,7 @@ static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG, if (CC == X86::COND_B) return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0)); - // Try to simplify the EFLAGS and condition code operands. - if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) { + if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) { SDValue Cond = DAG.getConstant(CC, DL, MVT::i8); return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags); } @@ -29339,16 +29279,15 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDLoc DL(N); + SDValue Chain = N->getOperand(0); + SDValue Dest = N->getOperand(1); SDValue EFLAGS = N->getOperand(3); X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2)); - // Try to simplify the EFLAGS and condition code operands. - // Make sure to not keep references to operands, as combineSetCCEFLAGS can - // RAUW them under us. - if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) { + if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) { SDValue Cond = DAG.getConstant(CC, DL, MVT::i8); - return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0), - N->getOperand(1), Cond, Flags); + return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond, + Flags); } return SDValue(); diff --git a/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll b/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll index 18581b47d11..152d7e0ea71 100644 --- a/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll +++ b/llvm/test/CodeGen/X86/atomic-eflags-reuse.ll @@ -4,7 +4,9 @@ define i8 @test_add_1_setcc_ne(i64* %p) #0 { ; CHECK-LABEL: test_add_1_setcc_ne: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: lock incq (%rdi) +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: lock xaddq %rax, (%rdi) +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: setne %al ; CHECK-NEXT: retq entry: @@ -17,7 +19,9 @@ entry: define i8 @test_sub_1_setcc_eq(i64* %p) #0 { ; CHECK-LABEL: test_sub_1_setcc_eq: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: lock decq (%rdi) +; CHECK-NEXT: movq $-1, %rax +; CHECK-NEXT: lock xaddq %rax, (%rdi) +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq entry: @@ -45,7 +49,9 @@ entry: define i8 @test_sub_10_setcc_sge(i64* %p) #0 { ; CHECK-LABEL: test_sub_10_setcc_sge: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: lock addq $-10, (%rdi) +; CHECK-NEXT: movq $-10, %rax +; CHECK-NEXT: lock xaddq %rax, (%rdi) +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: setns %al ; CHECK-NEXT: retq entry: @@ -60,7 +66,9 @@ entry: define i32 @test_add_10_brcond_sge(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_add_10_brcond_sge: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: lock addq $10, (%rdi) +; CHECK-NEXT: movl $10, %eax +; CHECK-NEXT: lock xaddq %rax, (%rdi) +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: js .LBB4_2 ; CHECK-NEXT: # BB#1: # %t ; CHECK-NEXT: movl %esi, %eax @@ -81,7 +89,9 @@ f: define i32 @test_sub_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: test_sub_1_cmov_slt: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: lock decq (%rdi) +; CHECK-NEXT: movq $-1, %rax +; CHECK-NEXT: lock xaddq %rax, (%rdi) +; CHECK-NEXT: testq %rax, %rax ; CHECK-NEXT: cmovnsl %edx, %esi ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: retq |