diff options
| author | Michael Kuperstein <michael.m.kuperstein@intel.com> | 2015-08-11 14:10:58 +0000 |
|---|---|---|
| committer | Michael Kuperstein <michael.m.kuperstein@intel.com> | 2015-08-11 14:10:58 +0000 |
| commit | 243c073a2ea3453ae888f3472fc1754c91c9a5f2 (patch) | |
| tree | 8ea5f4bafc11d140446bca374d510c3e1e168d51 /llvm/lib/Target/X86 | |
| parent | 23d0e83aa3745c51e1c32afa952e5bcec53cd251 (diff) | |
| download | bcm5719-llvm-243c073a2ea3453ae888f3472fc1754c91c9a5f2.tar.gz bcm5719-llvm-243c073a2ea3453ae888f3472fc1754c91c9a5f2.zip | |
[X86] Allow merging of immediates within a basic block for code size savings
First step in preventing immediates that occur more than once within a single
basic block from being pulled into their users, in order to prevent unnecessary
large instruction encoding .Currently enabled only when optimizing for size.
Patch by: zia.ansari@intel.com
Differential Revision: http://reviews.llvm.org/D11363
llvm-svn: 244601
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 76 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrArithmetic.td | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 40 |
3 files changed, 117 insertions, 7 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 34ea3b78109..d37db7f788b 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -283,6 +283,82 @@ namespace { Segment = CurDAG->getRegister(0, MVT::i32); } + // Utility function to determine whether we should avoid selecting + // immediate forms of instructions for better code size or not. + // At a high level, we'd like to avoid such instructions when + // we have similar constants used within the same basic block + // that can be kept in a register. + // + bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const { + uint32_t UseCount = 0; + + // Do not want to hoist if we're not optimizing for size. + // TODO: We'd like to remove this restriction. + // See the comment in X86InstrInfo.td for more info. + if (!OptForSize) + return false; + + // Walk all the users of the immediate. + for (SDNode::use_iterator UI = N->use_begin(), + UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) { + + SDNode *User = *UI; + + // This user is already selected. Count it as a legitimate use and + // move on. + if (User->isMachineOpcode()) { + UseCount++; + continue; + } + + // We want to count stores of immediates as real uses. + if (User->getOpcode() == ISD::STORE && + User->getOperand(1).getNode() == N) { + UseCount++; + continue; + } + + // We don't currently match users that have > 2 operands (except + // for stores, which are handled above) + // Those instruction won't match in ISEL, for now, and would + // be counted incorrectly. + // This may change in the future as we add additional instruction + // types. + if (User->getNumOperands() != 2) + continue; + + // Immediates that are used for offsets as part of stack + // manipulation should be left alone. These are typically + // used to indicate SP offsets for argument passing and + // will get pulled into stores/pushes (implicitly). + if (User->getOpcode() == X86ISD::ADD || + User->getOpcode() == ISD::ADD || + User->getOpcode() == X86ISD::SUB || + User->getOpcode() == ISD::SUB) { + + // Find the other operand of the add/sub. + SDValue OtherOp = User->getOperand(0); + if (OtherOp.getNode() == N) + OtherOp = User->getOperand(1); + + // Don't count if the other operand is SP. + RegisterSDNode *RegNode; + if (OtherOp->getOpcode() == ISD::CopyFromReg && + (RegNode = dyn_cast_or_null<RegisterSDNode>( + OtherOp->getOperand(1).getNode()))) + if ((RegNode->getReg() == X86::ESP) || + (RegNode->getReg() == X86::RSP)) + continue; + } + + // ... otherwise, count this and move on. + UseCount++; + } + + // If we have more than 1 use, then recommend for hoisting. + return (UseCount > 1); + } + /// getI8Imm - Return a target constant with the specified value, of type /// i8. inline SDValue getI8Imm(unsigned Imm, SDLoc DL) { diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 5e19ad448fc..64807aebd30 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -615,14 +615,14 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass, def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">; -def Xi8 : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem , - Imm8 , i8imm , imm, i8imm , invalid_node, +def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem, + Imm8, i8imm, imm8_su, i8imm, invalid_node, 0, OpSizeFixed, 0>; def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem, - Imm16, i16imm, imm, i16i8imm, i16immSExt8, + Imm16, i16imm, imm16_su, i16i8imm, i16immSExt8_su, 1, OpSize16, 0>; def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem, - Imm32, i32imm, imm, i32i8imm, i32immSExt8, + Imm32, i32imm, imm32_su, i32i8imm, i32immSExt8_su, 1, OpSize32, 0>; def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem, Imm32S, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8, diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index d626e078110..17b13e3220d 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -873,6 +873,40 @@ def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>; def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>; def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>; +// If we have multiple users of an immediate, it's much smaller to reuse +// the register, rather than encode the immediate in every instruction. +// This has the risk of increasing register pressure from stretched live +// ranges, however, the immediates should be trivial to rematerialize by +// the RA in the event of high register pressure. +// TODO : This is currently enabled for stores and binary ops. There are more +// cases for which this can be enabled, though this catches the bulk of the +// issues. +// TODO2 : This should really also be enabled under O2, but there's currently +// an issue with RA where we don't pull the constants into their users +// when we rematerialize them. I'll follow-up on enabling O2 after we fix that +// issue. +// TODO3 : This is currently limited to single basic blocks (DAG creation +// pulls block immediates to the top and merges them if necessary). +// Eventually, it would be nice to allow ConstantHoisting to merge constants +// globally for potentially added savings. +// +def imm8_su : PatLeaf<(i8 imm), [{ + return !shouldAvoidImmediateInstFormsForSize(N); +}]>; +def imm16_su : PatLeaf<(i16 imm), [{ + return !shouldAvoidImmediateInstFormsForSize(N); +}]>; +def imm32_su : PatLeaf<(i32 imm), [{ + return !shouldAvoidImmediateInstFormsForSize(N); +}]>; + +def i16immSExt8_su : PatLeaf<(i16immSExt8), [{ + return !shouldAvoidImmediateInstFormsForSize(N); +}]>; +def i32immSExt8_su : PatLeaf<(i32immSExt8), [{ + return !shouldAvoidImmediateInstFormsForSize(N); +}]>; + def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>; @@ -1283,13 +1317,13 @@ def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src), let SchedRW = [WriteStore] in { def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", - [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>; + [(store (i8 imm8_su:$src), addr:$dst)], IIC_MOV_MEM>; def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src), "mov{w}\t{$src, $dst|$dst, $src}", - [(store (i16 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16; + [(store (i16 imm16_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16; def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), "mov{l}\t{$src, $dst|$dst, $src}", - [(store (i32 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32; + [(store (i32 imm32_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32; def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>; |

