11 files changed, 65 insertions, 79 deletions
diff --git a/llvm/include/llvm/Target/TargetInstrInfo.h b/llvm/include/llvm/Target/TargetInstrInfo.h
index 75260878dd2..d19ca124206 100644
--- a/llvm/include/llvm/Target/TargetInstrInfo.h
+++ b/llvm/include/llvm/Target/TargetInstrInfo.h
@@ -51,6 +51,15 @@ public:
     EXTRACT_SUBREG = 4,
     INSERT_SUBREG = 5
   };
+  
+  // Target independent implict values for use with subreg insert. All targets
+  // that support insert_subreg support IMPL_VAL_UNDEF. Support for the other 
+  // values is target dependent.
+  enum ImplictVal {
+    IMPL_VAL_UNDEF = 0,
+    IMPL_VAL_ZERO  = 1,
+    LAST_IMPL_VAL  = 3
+  };
 
   unsigned getNumOpcodes() const { return NumOpcodes; }
 
@@ -120,7 +129,6 @@ public:
     return false;
   }
   
-  
   /// convertToThreeAddress - This method must be implemented by targets that
   /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
   /// may be able to convert a two-address instruction into one or more true
diff --git a/llvm/lib/CodeGen/LowerSubregs.cpp b/llvm/lib/CodeGen/LowerSubregs.cpp
index 531713e47fb..8945dd57b18 100644
--- a/llvm/lib/CodeGen/LowerSubregs.cpp
+++ b/llvm/lib/CodeGen/LowerSubregs.cpp
@@ -96,13 +96,10 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
          (MI->getOperand(2).isRegister() && MI->getOperand(2).isUse()) &&
           MI->getOperand(3).isImmediate() && "Invalid insert_subreg");
           
+  // Check if we're inserting into an implicit undef value.
+  bool isImplicit = MI->getOperand(1).isImmediate();
   unsigned DstReg = MI->getOperand(0).getReg();
-  unsigned SrcReg = 0;
-  // Check if we're inserting into an implicit value.
-  if (MI->getOperand(1).isImmediate())
-    SrcReg = DstReg;
-  else
-    SrcReg = MI->getOperand(1).getReg();
+  unsigned SrcReg = isImplicit ? DstReg : MI->getOperand(1).getReg();
   unsigned InsReg = MI->getOperand(2).getReg();
   unsigned SubIdx = MI->getOperand(3).getImm();     
 
@@ -118,13 +115,20 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
 
   DOUT << "subreg: CONVERTING: " << *MI;
        
+  // Check whether the implict subreg copy has side affects or not. Only copies
+  // into an undef value have no side affects, that is they can be eliminated
+  // without changing the semantics of the program.
+  bool copyHasSideAffects = isImplicit? 
+                  MI->getOperand(1).getImm() != TargetInstrInfo::IMPL_VAL_UNDEF
+                  : false; 
+       
   // If the inserted register is already allocated into a subregister
   // of the destination, we copy the subreg into the source
   // However, this is only safe if the insert instruction is the kill
   // of the source register
   bool revCopyOrder = TRI.isSubRegister(DstReg, InsReg);
-  if (revCopyOrder && InsReg != DstSubReg) {
-    if (MI->getOperand(1).isKill()) {
+  if (revCopyOrder && (InsReg != DstSubReg || copyHasSideAffects)) {
+    if (isImplicit || MI->getOperand(1).isKill()) {
       DstSubReg = TRI.getSubReg(SrcReg, SubIdx);
       // Insert sub-register copy
       const TargetRegisterClass *TRC1 = 0;
@@ -144,7 +148,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
     }
   }
 #ifndef NDEBUG
-  if (InsReg == DstSubReg) {
+  if (InsReg == DstSubReg && !copyHasSideAffects) {
      DOUT << "subreg: Eliminated subreg copy\n";
   }
 #endif
@@ -174,7 +178,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
   }
 #endif
 
-  if (!revCopyOrder && InsReg != DstSubReg) {
+  if (!revCopyOrder && (InsReg != DstSubReg || copyHasSideAffects)) {
     // Insert sub-register copy
     const TargetRegisterClass *TRC1 = 0;
     if (TargetRegisterInfo::isPhysicalRegister(InsReg)) {
diff --git a/llvm/lib/Target/TargetSelectionDAG.td b/llvm/lib/Target/TargetSelectionDAG.td
index dc0fcb5443e..73280f3f6fa 100644
--- a/llvm/lib/Target/TargetSelectionDAG.td
+++ b/llvm/lib/Target/TargetSelectionDAG.td
@@ -919,5 +919,10 @@ def SDT_dwarf_loc : SDTypeProfile<0, 3,
                       [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
 def dwarf_loc : SDNode<"ISD::DEBUG_LOC", SDT_dwarf_loc,[SDNPHasChain]>;
 
-
+//===----------------------------------------------------------------------===//
+// Implict value insert subreg support.
+//
+// These should match the enum TargetInstrInfo::ImplictVal.
+def tii_impl_val_undef : PatLeaf<(i32 0)>;
+def tii_impl_val_zero  : PatLeaf<(i32 1)>;
 
diff --git a/llvm/lib/Target/X86/X86ATTAsmPrinter.cpp b/llvm/lib/Target/X86/X86ATTAsmPrinter.cpp
index 4fef3827e3f..e51e065540e 100644
--- a/llvm/lib/Target/X86/X86ATTAsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86ATTAsmPrinter.cpp
@@ -637,14 +637,6 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   ++EmittedInsts;
 
-  // See if a truncate instruction can be turned into a nop.
-  switch (MI->getOpcode()) {
-  default: break;
-  case X86::PsMOVZX64rr32:
-    O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
-    break;
-  }
-
   // Call the autogenerated instruction printer routines.
   printInstruction(MI);
 }
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index d5601b74eab..bf233bfcbb4 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1533,14 +1533,9 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
       AddToISelQueue(N0);
       if (NVT == MVT::i64 || NVT == MVT::i32 || NVT == MVT::i16) {
         SDOperand SRIdx;
-        SDOperand ImplVal = CurDAG->getTargetConstant(X86::IMPL_VAL_UNDEF, 
-                                                      MVT::i32);
         switch(N0.getValueType()) {
         case MVT::i32:
           SRIdx = CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
-          // x86-64 zero extends 32-bit inserts int 64-bit registers
-          if (Subtarget->is64Bit())
-            ImplVal = CurDAG->getTargetConstant(X86::IMPL_VAL_ZERO, MVT::i32);
           break;
         case MVT::i16:
           SRIdx = CurDAG->getTargetConstant(X86::SUBREG_16BIT, MVT::i32);
@@ -1552,6 +1547,8 @@ SDNode *X86DAGToDAGISel::Select(SDOperand N) {
         default: assert(0 && "Unknown any_extend!");
         }
         if (SRIdx.Val) {
+          SDOperand ImplVal = 
+              CurDAG->getTargetConstant(X86InstrInfo::IMPL_VAL_UNDEF, MVT::i32);
           SDNode *ResNode = CurDAG->getTargetNode(X86::INSERT_SUBREG,
                                                   NVT, ImplVal, N0, SRIdx);
 
diff --git a/llvm/lib/Target/X86/X86Instr64bit.td b/llvm/lib/Target/X86/X86Instr64bit.td
index 3002b2c2108..d5a9f0bc66f 100644
--- a/llvm/lib/Target/X86/X86Instr64bit.td
+++ b/llvm/lib/Target/X86/X86Instr64bit.td
@@ -1089,22 +1089,6 @@ def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src)
 // Alias Instructions
 //===----------------------------------------------------------------------===//
 
-// Zero-extension
-// TODO: Remove this after proper i32 -> i64 zext support.
-def PsMOVZX64rr32: I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
-                     "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
-                     [(set GR64:$dst, (zext GR32:$src))]>;
-def PsMOVZX64rm32: I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
-                     "mov{l}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}",
-                     [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
-
-/// PsAND64rrFFFFFFFF - r = r & (2^32-1)
-def PsAND64rrFFFFFFFF
-  : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-  "mov{l}\t{${src:subreg32}, ${dst:subreg32}|${dst:subreg32}, ${src:subreg32}}",
-      [(set GR64:$dst, (and GR64:$src, i64immFFFFFFFF))]>;
-
-
 // Alias instructions that map movr0 to xor. Use xorl instead of xorq; it's
 // equivalent due to implicit zero-extending, and it sometimes has a smaller
 // encoding.
@@ -1220,27 +1204,48 @@ def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
 def : Pat<(parallel (X86cmp GR64:$src1, 0), (implicit EFLAGS)),
           (TEST64rr GR64:$src1, GR64:$src1)>;
 
+
+
+// Zero-extension
+def : Pat<(i64 (zext GR32:$src)), (INSERT_SUBREG tii_impl_val_zero, 
+                                            GR32:$src, x86_subreg_32bit)>;
+
 // zextload bool -> zextload byte
 def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
 
+def : Pat<(zextloadi64i32 addr:$src), (INSERT_SUBREG tii_impl_val_zero, 
+                                        (MOV32rm addr:$src), x86_subreg_32bit)>;
+
 // extload
 def : Pat<(extloadi64i1 addr:$src),  (MOVZX64rm8  addr:$src)>;
 def : Pat<(extloadi64i8 addr:$src),  (MOVZX64rm8  addr:$src)>;
 def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
-def : Pat<(extloadi64i32 addr:$src), (PsMOVZX64rm32 addr:$src)>;
+def : Pat<(extloadi64i32 addr:$src), (INSERT_SUBREG tii_impl_val_undef, 
+                                        (MOV32rm addr:$src), x86_subreg_32bit)>;
 
 // anyext -> zext
 def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8  GR8 :$src)>;
 def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16:$src)>;
-def : Pat<(i64 (anyext GR32:$src)), (PsMOVZX64rr32 GR32:$src)>;
+def : Pat<(i64 (anyext GR32:$src)), (INSERT_SUBREG tii_impl_val_undef, 
+                                        GR32:$src, x86_subreg_32bit)>;
+
 def : Pat<(i64 (anyext (loadi8  addr:$src))), (MOVZX64rm8  addr:$src)>;
 def : Pat<(i64 (anyext (loadi16 addr:$src))), (MOVZX64rm16 addr:$src)>;
-def : Pat<(i64 (anyext (loadi32 addr:$src))), (PsMOVZX64rm32 addr:$src)>;
+def : Pat<(i64 (anyext (loadi32 addr:$src))), (INSERT_SUBREG tii_impl_val_undef, 
+                                                (MOV32rm addr:$src), 
+                                                  x86_subreg_32bit)>;
 
 //===----------------------------------------------------------------------===//
 // Some peepholes
 //===----------------------------------------------------------------------===//
 
+
+// r & (2^32-1) ==> mov32 + implicit zext
+def : Pat<(and GR64:$src, i64immFFFFFFFF), 
+          (INSERT_SUBREG tii_impl_val_zero, 
+            (MOV32rr (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)),
+            x86_subreg_32bit)>;
+
 // (shl x, 1) ==> (add x, x)
 def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
 
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 8497fe3039c..ff666ff3c27 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -392,7 +392,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::PSHUFDri,        X86::PSHUFDmi },
     { X86::PSHUFHWri,       X86::PSHUFHWmi },
     { X86::PSHUFLWri,       X86::PSHUFLWmi },
-    { X86::PsMOVZX64rr32,   X86::PsMOVZX64rm32 },
     { X86::RCPPSr,          X86::RCPPSm },
     { X86::RCPPSr_Int,      X86::RCPPSm_Int },
     { X86::RSQRTPSr,        X86::RSQRTPSm },
@@ -922,8 +921,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
       // Build and insert into an implicit UNDEF value. This is OK because
       // well be shifting and then extracting the lower 16-bits. 
       MachineInstr *Ins = 
-       BuildMI(get(X86::INSERT_SUBREG),leaInReg).addImm(X86::IMPL_VAL_UNDEF)
-         .addReg(Src).addImm(X86::SUBREG_16BIT);
+       BuildMI(get(X86::INSERT_SUBREG),leaInReg)
+                    .addImm(X86InstrInfo::IMPL_VAL_UNDEF)
+                    .addReg(Src).addImm(X86::SUBREG_16BIT);
       
       NewMI = BuildMI(get(Opc), leaOutReg)
         .addReg(0).addImm(1 << ShAmt).addReg(leaInReg).addImm(0);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index e0d0342ae6d..f4cdb706955 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -45,15 +45,7 @@ namespace X86 {
     COND_S  = 15,
     COND_INVALID
   };
-  
-  // X86 specific implict values used for subregister inserts. 
-  // This can be used to model the fact that x86-64 by default
-  // inserts 32-bit values into 64-bit registers implicitly containing zeros.
-  enum ImplicitVal {
-    IMPL_VAL_UNDEF = 0,
-    IMPL_VAL_ZERO  = 1
-  };
-  
+    
   // Turn condition code into conditional branch opcode.
   unsigned GetCondBranchFromCond(CondCode CC);
   
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 4d03dba32dc..e32fb9c4b53 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -161,10 +161,6 @@ def i32i8imm  : Operand<i32>;
 // Branch targets have OtherVT type.
 def brtarget : Operand<OtherVT>;
 
-// These should match the enum X86::ImplicitVal
-def x86_impl_val_undef : PatLeaf<(i32 0)>;
-def x86_impl_val_zero  : PatLeaf<(i32 1)>;
-
 //===----------------------------------------------------------------------===//
 // X86 Complex Pattern Definitions.
 //
diff --git a/llvm/lib/Target/X86/X86IntelAsmPrinter.cpp b/llvm/lib/Target/X86/X86IntelAsmPrinter.cpp
index 6c46b454562..16d819a3d7c 100644
--- a/llvm/lib/Target/X86/X86IntelAsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86IntelAsmPrinter.cpp
@@ -314,14 +314,6 @@ bool X86IntelAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   ++EmittedInsts;
 
-  // See if a truncate instruction can be turned into a nop.
-  switch (MI->getOpcode()) {
-  default: break;
-  case X86::PsMOVZX64rr32:
-    O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
-    break;
-  }
-
   // Call the autogenerated instruction printer routines.
   printInstruction(MI);
 }
diff --git a/llvm/utils/TableGen/DAGISelEmitter.cpp b/llvm/utils/TableGen/DAGISelEmitter.cpp
index 291a913c379..0c6afab4c99 100644
--- a/llvm/utils/TableGen/DAGISelEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelEmitter.cpp
@@ -975,9 +975,9 @@ public:
         }
       }
 
-      // Generate MemOperandSDNodes nodes for each memory accesses covered by this
-      // pattern.
-      if (isRoot) {
+      // Generate MemOperandSDNodes nodes for each memory accesses covered by 
+      // this pattern.
+      if (II.isSimpleLoad | II.mayLoad | II.mayStore) {
         std::vector<std::string>::const_iterator mi, mie;
         for (mi = LSI.begin(), mie = LSI.end(); mi != mie; ++mi) {
           emitCode("SDOperand LSI_" + *mi + " = "
@@ -1880,14 +1880,9 @@ void DAGISelEmitter::EmitInstructionSelector(std::ostream &OS) {
      << "  SDOperand Tmp = CurDAG->getTargetConstant(C, MVT::i32);\n"
      << "  AddToISelQueue(N1);\n"
      << "  SDOperand Ops[] = { N0, N1, Tmp };\n"
-     << "  if (N0.getOpcode() == ISD::UNDEF) {\n"
-     << "    return CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG,\n"
-     << "                                 N.getValueType(), Ops+1, 2);\n"
-     << "  } else {\n"
-     << "    AddToISelQueue(N0);\n"
-     << "    return CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG,\n"
-     << "                                 N.getValueType(), Ops, 3);\n"
-     << "  }\n"
+     << "  AddToISelQueue(N0);\n"
+     << "  return CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG,\n"
+     << "                               N.getValueType(), Ops, 3);\n"
      << "}\n\n";
 
   OS << "// The main instruction selector code.\n"