summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp18
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp114
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp101
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td31
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td108
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td9
-rw-r--r--llvm/test/CodeGen/X86/sub-with-overflow.ll3
8 files changed, 178 insertions, 210 deletions
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index cbfdc4b3b93..9dd3f265254 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -2900,23 +2900,15 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
isCommutativeIntrinsic(II))
std::swap(LHS, RHS);
- bool UseIncDec = false;
- if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
- UseIncDec = true;
-
unsigned BaseOpc, CondOpc;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
- BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
- CondOpc = X86::SETOr;
- break;
+ BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
case Intrinsic::uadd_with_overflow:
BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
case Intrinsic::ssub_with_overflow:
- BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
- CondOpc = X86::SETOr;
- break;
+ BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
case Intrinsic::usub_with_overflow:
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
case Intrinsic::smul_with_overflow:
@@ -2938,9 +2930,11 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
{ X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
};
- if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
+ if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
+ CondOpc == X86::SETOr) {
+ // We can use INC/DEC.
ResultReg = createResultReg(TLI.getRegClassFor(VT));
- bool IsDec = BaseOpc == X86ISD::DEC;
+ bool IsDec = BaseOpc == ISD::SUB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill));
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index f6de519a1c8..72439946771 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2327,6 +2327,22 @@ bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
return true;
}
+static bool mayUseCarryFlag(X86::CondCode CC) {
+ switch (CC) {
+ // Comparisons which don't examine the CF flag.
+ case X86::COND_O: case X86::COND_NO:
+ case X86::COND_E: case X86::COND_NE:
+ case X86::COND_S: case X86::COND_NS:
+ case X86::COND_P: case X86::COND_NP:
+ case X86::COND_L: case X86::COND_GE:
+ case X86::COND_G: case X86::COND_LE:
+ return false;
+ // Anything else: assume conservatively.
+ default:
+ return true;
+ }
+}
+
/// Test whether the given node which sets flags has any uses which require the
/// CF flag to be accurate.
bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
@@ -2336,36 +2352,49 @@ bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
// Only check things that use the flags.
if (UI.getUse().getResNo() != Flags.getResNo())
continue;
- // Only examine CopyToReg uses that copy to EFLAGS.
- if (UI->getOpcode() != ISD::CopyToReg ||
- cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
- return false;
- // Examine each user of the CopyToReg use.
- for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
- FlagUI != FlagUE; ++FlagUI) {
- // Only examine the Flag result.
- if (FlagUI.getUse().getResNo() != 1)
- continue;
- // Anything unusual: assume conservatively.
- if (!FlagUI->isMachineOpcode())
- return false;
- // Examine the condition code of the user.
- X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
- switch (CC) {
- // Comparisons which don't examine the CF flag.
- case X86::COND_O: case X86::COND_NO:
- case X86::COND_E: case X86::COND_NE:
- case X86::COND_S: case X86::COND_NS:
- case X86::COND_P: case X86::COND_NP:
- case X86::COND_L: case X86::COND_GE:
- case X86::COND_G: case X86::COND_LE:
- continue;
- // Anything else: assume conservatively.
- default:
+ unsigned UIOpc = UI->getOpcode();
+
+ if (UIOpc == ISD::CopyToReg) {
+ // Only examine CopyToReg uses that copy to EFLAGS.
+ if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
return false;
+ // Examine each user of the CopyToReg use.
+ for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
+ FlagUI != FlagUE; ++FlagUI) {
+ // Only examine the Flag result.
+ if (FlagUI.getUse().getResNo() != 1)
+ continue;
+ // Anything unusual: assume conservatively.
+ if (!FlagUI->isMachineOpcode())
+ return false;
+ // Examine the condition code of the user.
+ X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());
+
+ if (mayUseCarryFlag(CC))
+ return false;
}
+
+ // This CopyToReg is ok. Move on to the next user.
+ continue;
+ }
+
+ // This might be an unselected node. So look for the pre-isel opcodes that
+ // use flags.
+ unsigned CCOpNo;
+ switch (UIOpc) {
+ default:
+ // Something unusual. Be conservative.
+ return false;
+ case X86ISD::SETCC: CCOpNo = 0; break;
+ case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
+ case X86ISD::CMOV: CCOpNo = 2; break;
+ case X86ISD::BRCOND: CCOpNo = 2; break;
}
+
+ X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
+ if (mayUseCarryFlag(CC))
+ return false;
}
return true;
}
@@ -2521,8 +2550,6 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
switch (Opc) {
default:
return false;
- case X86ISD::INC:
- case X86ISD::DEC:
case X86ISD::SUB:
case X86ISD::SBB:
break;
@@ -2573,20 +2600,27 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
MachineSDNode *Result;
switch (Opc) {
- case X86ISD::INC:
- case X86ISD::DEC: {
- unsigned NewOpc =
- Opc == X86ISD::INC
- ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
- : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
- const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
- Result =
- CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
- break;
- }
case X86ISD::ADD:
- case X86ISD::ADC:
case X86ISD::SUB:
+ // Try to match inc/dec.
+ if (!Subtarget->slowIncDec() ||
+ CurDAG->getMachineFunction().getFunction().optForSize()) {
+ bool IsOne = isOneConstant(StoredVal.getOperand(1));
+ bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
+ // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
+ if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
+ unsigned NewOpc =
+ ((Opc == X86ISD::ADD) == IsOne)
+ ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
+ : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
+ const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
+ Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
+ MVT::Other, Ops);
+ break;
+ }
+ }
+ LLVM_FALLTHROUGH;
+ case X86ISD::ADC:
case X86ISD::SBB:
case X86ISD::AND:
case X86ISD::OR:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 187b23179ec..7aab1cdc954 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18644,44 +18644,6 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
// which may be the result of a CAST. We use the variable 'Op', which is the
// non-casted variable when we check for possible users.
switch (ArithOp.getOpcode()) {
- case ISD::ADD:
- // We only want to rewrite this as a target-specific node with attached
- // flags if there is a reasonable chance of either using that to do custom
- // instructions selection that can fold some of the memory operands, or if
- // only the flags are used. If there are other uses, leave the node alone
- // and emit a test instruction.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() != ISD::CopyToReg &&
- UI->getOpcode() != ISD::SETCC &&
- UI->getOpcode() != ISD::STORE)
- goto default_case;
-
- if (auto *C = dyn_cast<ConstantSDNode>(ArithOp.getOperand(1))) {
- // An add of one will be selected as an INC.
- if (C->isOne() &&
- (!Subtarget.slowIncDec() ||
- DAG.getMachineFunction().getFunction().optForSize())) {
- Opcode = X86ISD::INC;
- NumOperands = 1;
- break;
- }
-
- // An add of negative one (subtract of one) will be selected as a DEC.
- if (C->isAllOnesValue() &&
- (!Subtarget.slowIncDec() ||
- DAG.getMachineFunction().getFunction().optForSize())) {
- Opcode = X86ISD::DEC;
- NumOperands = 1;
- break;
- }
- }
-
- // Otherwise use a regular EFLAGS-setting add.
- Opcode = X86ISD::ADD;
- NumOperands = 2;
- break;
-
case ISD::AND:
// If the primary 'and' result isn't used, don't bother using X86ISD::AND,
// because a TEST instruction will be better.
@@ -18689,11 +18651,13 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
break;
LLVM_FALLTHROUGH;
+ case ISD::ADD:
case ISD::SUB:
case ISD::OR:
case ISD::XOR:
- // Similar to ISD::ADD above, check if the uses will preclude useful
- // lowering of the target-specific node.
+ // Transform to an x86-specific ALU node with flags if there is a chance of
+ // using an RMW op or only the flags are used. Otherwise, leave
+ // the node alone and emit a 'test' instruction.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() != ISD::CopyToReg &&
@@ -18704,6 +18668,7 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
// Otherwise use a regular EFLAGS-setting instruction.
switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
+ case ISD::ADD: Opcode = X86ISD::ADD; break;
case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
@@ -18714,8 +18679,6 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
break;
case X86ISD::ADD:
case X86ISD::SUB:
- case X86ISD::INC:
- case X86ISD::DEC:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
@@ -19603,13 +19566,6 @@ getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
- // A subtract of one will be selected as a INC. Note that INC doesn't
- // set CF, so we can't do this for UADDO.
- if (isOneConstant(RHS)) {
- BaseOp = X86ISD::INC;
- Cond = X86::COND_O;
- break;
- }
BaseOp = X86ISD::ADD;
Cond = X86::COND_O;
break;
@@ -19618,13 +19574,6 @@ getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
Cond = X86::COND_B;
break;
case ISD::SSUBO:
- // A subtract of one will be selected as a DEC. Note that DEC doesn't
- // set CF, so we can't do this for USUBO.
- if (isOneConstant(RHS)) {
- BaseOp = X86ISD::DEC;
- Cond = X86::COND_O;
- break;
- }
BaseOp = X86ISD::SUB;
Cond = X86::COND_O;
break;
@@ -19675,8 +19624,7 @@ static bool isX86LogicalCmp(SDValue Op) {
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC ||
Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL ||
- Opc == X86ISD::INC || Opc == X86ISD::DEC || Opc == X86ISD::OR ||
- Opc == X86ISD::XOR || Opc == X86ISD::AND))
+ Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND))
return true;
return false;
@@ -25511,8 +25459,7 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
}
static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget,
- bool AllowIncDec = true) {
+ const X86Subtarget &Subtarget) {
unsigned NewOpc = 0;
switch (N->getOpcode()) {
case ISD::ATOMIC_LOAD_ADD:
@@ -25536,25 +25483,6 @@ static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
- if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
- // Convert to inc/dec if they aren't slow or we are optimizing for size.
- if (AllowIncDec && (!Subtarget.slowIncDec() ||
- DAG.getMachineFunction().getFunction().optForSize())) {
- if ((NewOpc == X86ISD::LADD && C->isOne()) ||
- (NewOpc == X86ISD::LSUB && C->isAllOnesValue()))
- return DAG.getMemIntrinsicNode(X86ISD::LINC, SDLoc(N),
- DAG.getVTList(MVT::i32, MVT::Other),
- {N->getOperand(0), N->getOperand(1)},
- /*MemVT=*/N->getSimpleValueType(0), MMO);
- if ((NewOpc == X86ISD::LSUB && C->isOne()) ||
- (NewOpc == X86ISD::LADD && C->isAllOnesValue()))
- return DAG.getMemIntrinsicNode(X86ISD::LDEC, SDLoc(N),
- DAG.getVTList(MVT::i32, MVT::Other),
- {N->getOperand(0), N->getOperand(1)},
- /*MemVT=*/N->getSimpleValueType(0), MMO);
- }
- }
-
return DAG.getMemIntrinsicNode(
NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
{N->getOperand(0), N->getOperand(1), N->getOperand(2)},
@@ -27034,8 +26962,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::LOR: return "X86ISD::LOR";
case X86ISD::LXOR: return "X86ISD::LXOR";
case X86ISD::LAND: return "X86ISD::LAND";
- case X86ISD::LINC: return "X86ISD::LINC";
- case X86ISD::LDEC: return "X86ISD::LDEC";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
@@ -27073,8 +26999,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
- case X86ISD::INC: return "X86ISD::INC";
- case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
@@ -34297,16 +34221,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
/*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1),
/*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()),
AN->getMemOperand());
- // If the comparision uses the CF flag we can't use INC/DEC instructions.
- bool NeedCF = false;
- switch (CC) {
- default: break;
- case X86::COND_A: case X86::COND_AE:
- case X86::COND_B: case X86::COND_BE:
- NeedCF = true;
- break;
- }
- auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget, !NeedCF);
+ auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget);
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
DAG.getUNDEF(CmpLHS.getValueType()));
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 58c0f958d3c..6dd6e9acc9e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -337,7 +337,7 @@ namespace llvm {
// Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL, UMUL,
- INC, DEC, OR, XOR, AND,
+ OR, XOR, AND,
// Bit field extract.
BEXTR,
@@ -568,7 +568,7 @@ namespace llvm {
/// LOCK-prefixed arithmetic read-modify-write instructions.
/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
- LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
+ LADD, LSUB, LOR, LXOR, LAND,
// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 9b9e75ce9e1..cb5a4e5b5d4 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -422,22 +422,35 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
} // SchedRW
} // CodeSize
+def X86add_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86add_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86sub_flag node:$lhs, node:$rhs), [{
+ // Only use DEC if the result is used.
+ return !SDValue(N, 0).use_empty() && hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"inc{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+ [(set GR8:$dst, EFLAGS, (X86add_flag_nocf GR8:$src1, 1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, OpSize16;
+ [(set GR16:$dst, EFLAGS, (X86add_flag_nocf GR16:$src1, 1))]>,
+ OpSize16;
def INC32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, OpSize32;
+ [(set GR32:$dst, EFLAGS, (X86add_flag_nocf GR32:$src1, 1))]>,
+ OpSize32;
def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+ [(set GR64:$dst, EFLAGS, (X86add_flag_nocf GR64:$src1, 1))]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
@@ -474,16 +487,18 @@ let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
let CodeSize = 2 in
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"dec{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+ [(set GR8:$dst, EFLAGS, (X86sub_flag_nocf GR8:$src1, 1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, OpSize16;
+ [(set GR16:$dst, EFLAGS, (X86sub_flag_nocf GR16:$src1, 1))]>,
+ OpSize16;
def DEC32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, OpSize32;
+ [(set GR32:$dst, EFLAGS, (X86sub_flag_nocf GR32:$src1, 1))]>,
+ OpSize32;
def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+ [(set GR64:$dst, EFLAGS, (X86sub_flag_nocf GR64:$src1, 1))]>;
} // isConvertibleToThreeAddress = 1, CodeSize = 2
// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 8a7b90bdcc9..703c517b2f8 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -776,53 +776,64 @@ defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">;
defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">;
defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;
-multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
- string frag, string mnemonic> {
-let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
- SchedRW = [WriteALURMW] in {
-def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
- !strconcat(mnemonic, "{b}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_8") addr:$dst))]>,
- LOCK;
-def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
- !strconcat(mnemonic, "{w}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_16") addr:$dst))]>,
- OpSize16, LOCK;
-def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
- !strconcat(mnemonic, "{l}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_32") addr:$dst))]>,
- OpSize32, LOCK;
-def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
- !strconcat(mnemonic, "{q}\t$dst"),
- [(set EFLAGS, (!cast<PatFrag>(frag # "_64") addr:$dst))]>,
- LOCK;
-}
-}
+def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86lock_add node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 0));
+}]>;
-multiclass unary_atomic_intrin<SDNode atomic_op> {
- def _8 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
- }]>;
- def _16 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
- }]>;
- def _32 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
- }]>;
- def _64 : PatFrag<(ops node:$ptr),
- (atomic_op node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
- }]>;
-}
+def X86lock_sub_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86lock_sub node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 0));
+}]>;
-defm X86lock_inc : unary_atomic_intrin<X86lock_inc>;
-defm X86lock_dec : unary_atomic_intrin<X86lock_dec>;
+let Predicates = [UseIncDec] in {
+ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
+ SchedRW = [WriteALURMW] in {
+ def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+ "inc{b}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i8 1)))]>,
+ LOCK;
+ def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+ "inc{w}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i16 1)))]>,
+ OpSize16, LOCK;
+ def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+ "inc{l}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i32 1)))]>,
+ OpSize32, LOCK;
+ def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+ "inc{q}\t$dst",
+ [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>,
+ LOCK;
+
+ def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+ "dec{b}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i8 1)))]>,
+ LOCK;
+ def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+ "dec{w}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i16 1)))]>,
+ OpSize16, LOCK;
+ def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+ "dec{l}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i32 1)))]>,
+ OpSize32, LOCK;
+ def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+ "dec{q}\t$dst",
+ [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i64 1)))]>,
+ LOCK;
+ }
-defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "X86lock_inc", "inc">;
-defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "X86lock_dec", "dec">;
+ // Additional patterns for -1 constant.
+ def : Pat<(X86lock_add addr:$dst, (i8 -1)), (LOCK_DEC8m addr:$dst)>;
+ def : Pat<(X86lock_add addr:$dst, (i16 -1)), (LOCK_DEC16m addr:$dst)>;
+ def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>;
+ def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i8 -1)), (LOCK_INC8m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>;
+ def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;
+}
// Atomic compare and swap.
multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
@@ -2018,6 +2029,15 @@ let Predicates = [UseIncDec] in {
def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+
+ def : Pat<(X86add_flag_nocf GR8:$src, -1), (DEC8r GR8:$src)>;
+ def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>;
+ def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>;
+ def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>;
+ def : Pat<(X86sub_flag_nocf GR8:$src, -1), (INC8r GR8:$src)>;
+ def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>;
+ def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>;
+ def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>;
}
// or reg/reg.
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 7a401d4fc35..e53f83baa3c 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -253,8 +253,6 @@ def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
def X86adc_flag : SDNode<"X86ISD::ADC", SDTBinaryArithWithFlagsInOut>;
def X86sbb_flag : SDNode<"X86ISD::SBB", SDTBinaryArithWithFlagsInOut>;
-def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
-def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
@@ -278,13 +276,6 @@ def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad,
SDNPMemOperand]>;
-def X86lock_inc : SDNode<"X86ISD::LINC", SDTLockUnaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-def X86lock_dec : SDNode<"X86ISD::LDEC", SDTLockUnaryArithWithFlags,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
- SDNPMemOperand]>;
-
def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>;
diff --git a/llvm/test/CodeGen/X86/sub-with-overflow.ll b/llvm/test/CodeGen/X86/sub-with-overflow.ll
index 6de0beeabdf..0bcf2d8a565 100644
--- a/llvm/test/CodeGen/X86/sub-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/sub-with-overflow.ll
@@ -83,8 +83,7 @@ declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)
define i1 @func3(i32 %x) nounwind {
; CHECK-LABEL: func3:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: decl %eax
+; CHECK-NEXT: cmpl $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: seto %al
; CHECK-NEXT: retl
entry:
OpenPOWER on IntegriCloud