[X86] Allow merging of immediates within a basic block for code size savings

First step in preventing immediates that occur more than once within a single basic block from being pulled into their users, in order to prevent unnecessary large instruction encoding .Currently enabled only when optimizing for size. Patch by: zia.ansari@intel.com Differential Revision: http://reviews.llvm.org/D11363 llvm-svn: 244601
author: Michael Kuperstein <michael.m.kuperstein@intel.com> 2015-08-11 14:10:58 +0000
committer: Michael Kuperstein <michael.m.kuperstein@intel.com> 2015-08-11 14:10:58 +0000
commit: 243c073a2ea3453ae888f3472fc1754c91c9a5f2 (patch)
tree: 8ea5f4bafc11d140446bca374d510c3e1e168d51 /llvm/lib/Target/X86
parent: 23d0e83aa3745c51e1c32afa952e5bcec53cd251 (diff)
download: bcm5719-llvm-243c073a2ea3453ae888f3472fc1754c91c9a5f2.tar.gz
bcm5719-llvm-243c073a2ea3453ae888f3472fc1754c91c9a5f2.zip
3 files changed, 117 insertions, 7 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 34ea3b78109..d37db7f788b 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -283,6 +283,82 @@ namespace {
         Segment = CurDAG->getRegister(0, MVT::i32);
     }
 
+    // Utility function to determine whether we should avoid selecting
+    // immediate forms of instructions for better code size or not.
+    // At a high level, we'd like to avoid such instructions when
+    // we have similar constants used within the same basic block
+    // that can be kept in a register.
+    //
+    bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
+      uint32_t UseCount = 0;
+
+      // Do not want to hoist if we're not optimizing for size.
+      // TODO: We'd like to remove this restriction.
+      // See the comment in X86InstrInfo.td for more info.
+      if (!OptForSize)
+        return false;
+
+      // Walk all the users of the immediate.
+      for (SDNode::use_iterator UI = N->use_begin(),
+           UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
+
+        SDNode *User = *UI;
+
+        // This user is already selected. Count it as a legitimate use and
+        // move on.
+        if (User->isMachineOpcode()) {
+          UseCount++;
+          continue;
+        }
+
+        // We want to count stores of immediates as real uses.
+        if (User->getOpcode() == ISD::STORE &&
+            User->getOperand(1).getNode() == N) {
+          UseCount++;
+          continue;
+        }
+
+        // We don't currently match users that have > 2 operands (except
+        // for stores, which are handled above)
+        // Those instruction won't match in ISEL, for now, and would
+        // be counted incorrectly.
+        // This may change in the future as we add additional instruction
+        // types.
+        if (User->getNumOperands() != 2)
+          continue;
+        
+        // Immediates that are used for offsets as part of stack
+        // manipulation should be left alone. These are typically
+        // used to indicate SP offsets for argument passing and
+        // will get pulled into stores/pushes (implicitly).
+        if (User->getOpcode() == X86ISD::ADD ||
+            User->getOpcode() == ISD::ADD    ||
+            User->getOpcode() == X86ISD::SUB ||
+            User->getOpcode() == ISD::SUB) {
+
+          // Find the other operand of the add/sub.
+          SDValue OtherOp = User->getOperand(0);
+          if (OtherOp.getNode() == N)
+            OtherOp = User->getOperand(1);
+
+          // Don't count if the other operand is SP.
+          RegisterSDNode *RegNode;
+          if (OtherOp->getOpcode() == ISD::CopyFromReg &&
+              (RegNode = dyn_cast_or_null<RegisterSDNode>(
+                 OtherOp->getOperand(1).getNode())))
+            if ((RegNode->getReg() == X86::ESP) ||
+                (RegNode->getReg() == X86::RSP))
+              continue;
+        }
+
+        // ... otherwise, count this and move on.
+        UseCount++;
+      }
+
+      // If we have more than 1 use, then recommend for hoisting.
+      return (UseCount > 1);
+    }
+
     /// getI8Imm - Return a target constant with the specified value, of type
     /// i8.
     inline SDValue getI8Imm(unsigned Imm, SDLoc DL) {
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 5e19ad448fc..64807aebd30 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -615,14 +615,14 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
 def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
 
 
-def Xi8  : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem ,
-                       Imm8 , i8imm ,    imm,          i8imm   , invalid_node,
+def Xi8  : X86TypeInfo<i8, "b", GR8, loadi8, i8mem,
+                       Imm8, i8imm, imm8_su, i8imm, invalid_node,
                        0, OpSizeFixed, 0>;
 def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
-                       Imm16, i16imm,    imm,          i16i8imm, i16immSExt8,
+                       Imm16, i16imm, imm16_su, i16i8imm, i16immSExt8_su,
                        1, OpSize16, 0>;
 def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
-                       Imm32, i32imm,    imm,          i32i8imm, i32immSExt8,
+                       Imm32, i32imm, imm32_su, i32i8imm, i32immSExt8_su,
                        1, OpSize32, 0>;
 def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
                        Imm32S, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index d626e078110..17b13e3220d 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -873,6 +873,40 @@ def i16immSExt8  : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
 def i32immSExt8  : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
 def i64immSExt8  : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
 
+// If we have multiple users of an immediate, it's much smaller to reuse
+// the register, rather than encode the immediate in every instruction.
+// This has the risk of increasing register pressure from stretched live
+// ranges, however, the immediates should be trivial to rematerialize by
+// the RA in the event of high register pressure.
+// TODO : This is currently enabled for stores and binary ops. There are more
+// cases for which this can be enabled, though this catches the bulk of the
+// issues.
+// TODO2 : This should really also be enabled under O2, but there's currently
+// an issue with RA where we don't pull the constants into their users
+// when we rematerialize them. I'll follow-up on enabling O2 after we fix that
+// issue.
+// TODO3 : This is currently limited to single basic blocks (DAG creation
+// pulls block immediates to the top and merges them if necessary).
+// Eventually, it would be nice to allow ConstantHoisting to merge constants
+// globally for potentially added savings.
+//
+def imm8_su : PatLeaf<(i8 imm), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def imm16_su : PatLeaf<(i16 imm), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def imm32_su : PatLeaf<(i32 imm), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
+def i16immSExt8_su : PatLeaf<(i16immSExt8), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i32immSExt8_su : PatLeaf<(i32immSExt8), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
 
 def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
 
@@ -1283,13 +1317,13 @@ def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src),
 let SchedRW = [WriteStore] in {
 def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
                    "mov{b}\t{$src, $dst|$dst, $src}",
-                   [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>;
+                   [(store (i8 imm8_su:$src), addr:$dst)], IIC_MOV_MEM>;
 def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
                    "mov{w}\t{$src, $dst|$dst, $src}",
-                   [(store (i16 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16;
+                   [(store (i16 imm16_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16;
 def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
                    "mov{l}\t{$src, $dst|$dst, $src}",
-                   [(store (i32 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32;
+                   [(store (i32 imm32_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32;
 def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
                        "mov{q}\t{$src, $dst|$dst, $src}",
                        [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
author	Michael Kuperstein <michael.m.kuperstein@intel.com>	2015-08-11 14:10:58 +0000
committer	Michael Kuperstein <michael.m.kuperstein@intel.com>	2015-08-11 14:10:58 +0000
commit	243c073a2ea3453ae888f3472fc1754c91c9a5f2 (patch)
tree	8ea5f4bafc11d140446bca374d510c3e1e168d51 /llvm/lib/Target/X86
parent	23d0e83aa3745c51e1c32afa952e5bcec53cd251 (diff)
download	bcm5719-llvm-243c073a2ea3453ae888f3472fc1754c91c9a5f2.tar.gz bcm5719-llvm-243c073a2ea3453ae888f3472fc1754c91c9a5f2.zip