diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2017-09-07 23:54:24 +0000 |
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2017-09-07 23:54:24 +0000 |
| commit | 52a31bf268126442540e7c0846f51061e98f4867 (patch) | |
| tree | 5f79a974c6c16160816b6503afd7cb9b0a49bd89 /llvm/lib/Target | |
| parent | d63ac339483a4af48a2a3f445e442a4be1302c3d (diff) | |
| download | bcm5719-llvm-52a31bf268126442540e7c0846f51061e98f4867.tar.gz bcm5719-llvm-52a31bf268126442540e7c0846f51061e98f4867.zip | |
[x86] Extend the manual ISel of `add` and `sub` with both RMW memory
operands and used flags to support matching immediate operands.
This is a bit trickier than register operands, and we still want to fall
back on a register operands even for things that appear to be
"immediates" when they won't actually select into the operation's
immediate operand. This also requires us to handle things like selecting
`sub` vs. `add` to minimize the number of bits needed to represent the
immediate, and picking the shortest immediate encoding. In order to
that, we in turn need to scan to make sure that CF isn't used as it will
get inverted.
The end result seems very nice though, and we're now generating
optimal instruction sequences for these patterns IMO.
A follow-up patch will further expand this to other operations with RMW
memory operands. But handing `add` and `sub` are useful starting points
to flesh out the machinery and make sure interesting and complex cases
can be handled.
Thanks to Craig Topper who provided a few fixes and improvements to this
patch in addition to the review!
Differential Revision: https://reviews.llvm.org/D37139
llvm-svn: 312764
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 157 |
1 files changed, 142 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 2fec38d7c40..17ee5ca2d4b 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1932,6 +1932,73 @@ static bool hasNoSignedComparisonUses(SDNode *N) { return true; } +/// Test whether the given node which sets flags has any uses which require the +/// CF flag to be accurate. +static bool hasNoCarryFlagUses(SDNode *N) { + // Examine each user of the node. + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; + ++UI) { + // Only check things that use the flags. + if (UI.getUse().getResNo() != 1) + continue; + // Only examine CopyToReg uses. + if (UI->getOpcode() != ISD::CopyToReg) + return false; + // Only examine CopyToReg uses that copy to EFLAGS. + if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS) + return false; + // Examine each user of the CopyToReg use. + for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end(); + FlagUI != FlagUE; ++FlagUI) { + // Only examine the Flag result. + if (FlagUI.getUse().getResNo() != 1) + continue; + // Anything unusual: assume conservatively. + if (!FlagUI->isMachineOpcode()) + return false; + // Examine the opcode of the user. + switch (FlagUI->getMachineOpcode()) { + // Comparisons which don't examine the CF flag. + case X86::SETOr: case X86::SETNOr: case X86::SETEr: case X86::SETNEr: + case X86::SETSr: case X86::SETNSr: case X86::SETPr: case X86::SETNPr: + case X86::SETLr: case X86::SETGEr: case X86::SETLEr: case X86::SETGr: + case X86::JO_1: case X86::JNO_1: case X86::JE_1: case X86::JNE_1: + case X86::JS_1: case X86::JNS_1: case X86::JP_1: case X86::JNP_1: + case X86::JL_1: case X86::JGE_1: case X86::JLE_1: case X86::JG_1: + case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr: + case X86::CMOVO16rm: case X86::CMOVO32rm: case X86::CMOVO64rm: + case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: + case X86::CMOVNO16rm: case X86::CMOVNO32rm: case X86::CMOVNO64rm: + case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: + case X86::CMOVE16rm: case X86::CMOVE32rm: case X86::CMOVE64rm: + case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr: + case X86::CMOVNE16rm: case X86::CMOVNE32rm: case X86::CMOVNE64rm: + case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr: + case X86::CMOVS16rm: case X86::CMOVS32rm: case X86::CMOVS64rm: + case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr: + case X86::CMOVNS16rm: case X86::CMOVNS32rm: case X86::CMOVNS64rm: + case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr: + case X86::CMOVP16rm: case X86::CMOVP32rm: case X86::CMOVP64rm: + case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr: + case X86::CMOVNP16rm: case X86::CMOVNP32rm: case X86::CMOVNP64rm: + case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr: + case X86::CMOVL16rm: case X86::CMOVL32rm: case X86::CMOVL64rm: + case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr: + case X86::CMOVGE16rm: case X86::CMOVGE32rm: case X86::CMOVGE64rm: + case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr: + case X86::CMOVLE16rm: case X86::CMOVLE32rm: case X86::CMOVLE64rm: + case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr: + case X86::CMOVG16rm: case X86::CMOVG32rm: case X86::CMOVG64rm: + continue; + // Anything else: assume conservatively. + default: + return false; + } + } + } + return true; +} + /// Check whether or not the chain ending in StoreNode is suitable for doing /// the {load; op; store} to modify transformation. static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, @@ -2064,8 +2131,8 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { Segment)) return false; - auto SelectOpcodeForSize = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16, - unsigned Opc8) { + auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16, + unsigned Opc8 = 0) { switch (MemVT.getSimpleVT().SimpleTy) { case MVT::i64: return Opc64; @@ -2084,11 +2151,10 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { switch (Opc) { case X86ISD::INC: case X86ISD::DEC: { - unsigned NewOpc = Opc == X86ISD::INC - ? SelectOpcodeForSize(X86::INC64m, X86::INC32m, - X86::INC16m, X86::INC8m) - : SelectOpcodeForSize(X86::DEC64m, X86::DEC32m, - X86::DEC16m, X86::DEC8m); + unsigned NewOpc = + Opc == X86ISD::INC + ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m) + : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops); @@ -2096,14 +2162,75 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { } case X86ISD::ADD: case X86ISD::SUB: { - unsigned NewOpc = Opc == X86ISD::ADD - ? SelectOpcodeForSize(X86::ADD64mr, X86::ADD32mr, - X86::ADD16mr, X86::ADD8mr) - : SelectOpcodeForSize(X86::SUB64mr, X86::SUB32mr, - X86::SUB16mr, X86::SUB8mr); - const SDValue Ops[] = {Base, Scale, Index, - Disp, Segment, StoredVal->getOperand(1), - InputChain}; + auto SelectRegOpcode = [SelectOpcode](unsigned Opc) { + switch (Opc) { + case X86ISD::ADD: + return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr, + X86::ADD8mr); + case X86ISD::SUB: + return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr, + X86::SUB8mr); + default: + llvm_unreachable("Invalid opcode!"); + } + }; + auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) { + switch (Opc) { + case X86ISD::ADD: + return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8); + case X86ISD::SUB: + return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8); + default: + llvm_unreachable("Invalid opcode!"); + } + }; + auto SelectImmOpcode = [SelectOpcode](unsigned Opc) { + switch (Opc) { + case X86ISD::ADD: + return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi, + X86::ADD8mi); + case X86ISD::SUB: + return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi, + X86::SUB8mi); + default: + llvm_unreachable("Invalid opcode!"); + } + }; + + unsigned NewOpc = SelectRegOpcode(Opc); + SDValue Operand = StoredVal->getOperand(1); + + // See if the operand is a constant that we can fold into an immediate + // operand. + if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) { + auto OperandV = OperandC->getAPIntValue(); + + // Check if we can shrink the operand enough to fit in an immediate (or + // fit into a smaller immediate) by negating it and switching the + // operation. + if (((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 && + (-OperandV).getMinSignedBits() <= 8) || + (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 && + (-OperandV).getMinSignedBits() <= 32)) && + hasNoCarryFlagUses(StoredVal.getNode())) { + OperandV = -OperandV; + Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD; + } + + // First try to fit this into an Imm8 operand. If it doesn't fit, then try + // the larger immediate operand. + if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) { + Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); + NewOpc = SelectImm8Opcode(Opc); + } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() && + (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) { + Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); + NewOpc = SelectImmOpcode(Opc); + } + } + + const SDValue Ops[] = {Base, Scale, Index, Disp, + Segment, Operand, InputChain}; Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops); break; |

