summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp55
1 files changed, 45 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6fb94700b9f..d85953c43b0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23575,7 +23575,9 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
}
-static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG) {
+static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ bool AllowIncDec = true) {
unsigned NewOpc = 0;
switch (N->getOpcode()) {
case ISD::ATOMIC_LOAD_ADD:
@@ -23598,6 +23600,26 @@ static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG) {
}
MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
+
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
+ // Convert to inc/dec if they aren't slow or we are optimizing for size.
+ if (AllowIncDec && (!Subtarget.slowIncDec() ||
+ DAG.getMachineFunction().getFunction()->optForSize())) {
+ if ((NewOpc == X86ISD::LADD && C->isOne()) ||
+ (NewOpc == X86ISD::LSUB && C->isAllOnesValue()))
+ return DAG.getMemIntrinsicNode(X86ISD::LINC, SDLoc(N),
+ DAG.getVTList(MVT::i32, MVT::Other),
+ {N->getOperand(0), N->getOperand(1)},
+ /*MemVT=*/N->getSimpleValueType(0), MMO);
+ if ((NewOpc == X86ISD::LSUB && C->isOne()) ||
+ (NewOpc == X86ISD::LADD && C->isAllOnesValue()))
+ return DAG.getMemIntrinsicNode(X86ISD::LDEC, SDLoc(N),
+ DAG.getVTList(MVT::i32, MVT::Other),
+ {N->getOperand(0), N->getOperand(1)},
+ /*MemVT=*/N->getSimpleValueType(0), MMO);
+ }
+ }
+
return DAG.getMemIntrinsicNode(
NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
{N->getOperand(0), N->getOperand(1), N->getOperand(2)},
@@ -23631,7 +23653,7 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
return N;
}
- SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG);
+ SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget);
// RAUW the chain, but don't worry about the result, as it's unused.
assert(!N->hasAnyUseOfValue(0));
DAG.ReplaceAllUsesOfValueWith(N.getValue(1), LockOp.getValue(1));
@@ -24709,6 +24731,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::LOR: return "X86ISD::LOR";
case X86ISD::LXOR: return "X86ISD::LXOR";
case X86ISD::LAND: return "X86ISD::LAND";
+ case X86ISD::LINC: return "X86ISD::LINC";
+ case X86ISD::LDEC: return "X86ISD::LDEC";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
@@ -31008,7 +31032,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
/// Note that this is only legal for some op/cc combinations.
static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// This combine only operates on CMP-like nodes.
if (!(Cmp.getOpcode() == X86ISD::CMP ||
(Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
@@ -31068,7 +31093,16 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
/*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1),
/*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()),
AN->getMemOperand());
- auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG);
+ // If the comparision uses the CF flag we can't use INC/DEC instructions.
+ bool NeedCF = false;
+ switch (CC) {
+ default: break;
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ NeedCF = true;
+ break;
+ }
+ auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget, !NeedCF);
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
DAG.getUNDEF(CmpLHS.getValueType()));
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
@@ -31091,7 +31125,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
else
return SDValue();
- SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG);
+ SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG, Subtarget);
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
DAG.getUNDEF(CmpLHS.getValueType()));
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
@@ -31298,14 +31332,15 @@ static SDValue combineCarryThroughADD(SDValue EFLAGS) {
/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
/// uses of chain values.
static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
if (CC == X86::COND_B)
if (SDValue Flags = combineCarryThroughADD(EFLAGS))
return Flags;
if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
return R;
- return combineSetCCAtomicArith(EFLAGS, CC, DAG);
+ return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
}
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
@@ -31332,7 +31367,7 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
// Try to simplify the EFLAGS and condition code operands.
// We can't always do this as FCMOV only supports a subset of X86 cond.
- if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG)) {
+ if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) {
if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) {
SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8),
Flags};
@@ -35478,7 +35513,7 @@ static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
SDValue EFLAGS = N->getOperand(1);
// Try to simplify the EFLAGS and condition code operands.
- if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG))
+ if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget))
return getSETCC(CC, Flags, DL, DAG);
return SDValue();
@@ -35494,7 +35529,7 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
// Try to simplify the EFLAGS and condition code operands.
// Make sure to not keep references to operands, as combineSetCCEFLAGS can
// RAUW them under us.
- if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
+ if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
N->getOperand(1), Cond, Flags);
OpenPOWER on IntegriCloud