diff options
Diffstat (limited to 'llvm')
21 files changed, 4452 insertions, 3808 deletions
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 8dc83442c30..f3130b6e128 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -457,6 +457,13 @@ public:    /// Replace successor OLD with NEW and update probability info.    void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New); +  /// Copy a successor (and any probability info) from original block to this +  /// block's. Uses an iterator into the original blocks successors. +  /// +  /// This is useful when doing a partial clone of successors. Afterward, the +  /// probabilities may need to be normalized. +  void copySuccessor(MachineBasicBlock *Orig, succ_iterator I); +    /// Transfers all the successors from MBB to this machine basic block (i.e.,    /// copies all the successors FromMBB and remove all the successors from    /// FromMBB). diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index f8fad028aa9..168a4513b96 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -720,6 +720,14 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,    removeSuccessor(OldI);  } +void MachineBasicBlock::copySuccessor(MachineBasicBlock *Orig, +                                      succ_iterator I) { +  if (Orig->Probs.empty()) +    addSuccessor(*I, Orig->getSuccProbability(I)); +  else +    addSuccessorWithoutProb(*I); +} +  void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) {    Predecessors.push_back(Pred);  } diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index a4be362452f..d4bfe510547 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -34,6 +34,7 @@ set(sources    X86FixupLEAs.cpp    X86AvoidStoreForwardingBlocks.cpp    X86FixupSetCC.cpp +  X86FlagsCopyLowering.cpp    X86FloatingPoint.cpp    X86FrameLowering.cpp    X86InstructionSelector.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 90eac737eaa..4a46e36e47b 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -78,6 +78,9 @@ FunctionPass *createX86FixupSetCC();  /// Return a pass that avoids creating store forward block issues in the hardware.  FunctionPass *createX86AvoidStoreForwardingBlocks(); +/// Return a pass that lowers EFLAGS copy pseudo instructions. +FunctionPass *createX86FlagsCopyLoweringPass(); +  /// Return a pass that expands WinAlloca pseudo-instructions.  FunctionPass *createX86WinAllocaExpander(); diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp new file mode 100644 index 00000000000..1f4bd7fd501 --- /dev/null +++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -0,0 +1,734 @@ +//====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual +/// flag bits. +/// +/// We have to do this by carefully analyzing and rewriting the usage of the +/// copied EFLAGS register because there is no general way to rematerialize the +/// entire EFLAGS register safely and efficiently. Using `popf` both forces +/// dynamic stack adjustment and can create correctness issues due to IF, TF, +/// and other non-status flags being overwritten. Using sequences involving +/// SAHF don't work on all x86 processors and are often quite slow compared to +/// directly testing a single status preserved in its own GPR. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <iterator> +#include <utility> + +using namespace llvm; + +#define PASS_KEY "x86-flags-copy-lowering" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated"); +STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted"); +STATISTIC(NumTestsInserted, "Number of test instructions inserted"); +STATISTIC(NumAddsInserted, "Number of adds instructions inserted"); + +namespace llvm { + +void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); + +} // end namespace llvm + +namespace { + +// Convenient array type for storing registers associated with each condition. +using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>; + +class X86FlagsCopyLoweringPass : public MachineFunctionPass { +public: +  X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { +    initializeX86FlagsCopyLoweringPassPass(*PassRegistry::getPassRegistry()); +  } + +  StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; } +  bool runOnMachineFunction(MachineFunction &MF) override; +  void getAnalysisUsage(AnalysisUsage &AU) const override; + +  /// Pass identification, replacement for typeid. +  static char ID; + +private: +  MachineRegisterInfo *MRI; +  const X86InstrInfo *TII; +  const TargetRegisterInfo *TRI; +  const TargetRegisterClass *PromoteRC; + +  CondRegArray collectCondsInRegs(MachineBasicBlock &MBB, +                                  MachineInstr &CopyDefI); + +  unsigned promoteCondToReg(MachineBasicBlock &MBB, +                            MachineBasicBlock::iterator TestPos, +                            DebugLoc TestLoc, X86::CondCode Cond); +  std::pair<unsigned, bool> +  getCondOrInverseInReg(MachineBasicBlock &TestMBB, +                        MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, +                        X86::CondCode Cond, CondRegArray &CondRegs); +  void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos, +                  DebugLoc Loc, unsigned Reg); + +  void rewriteArithmetic(MachineBasicBlock &TestMBB, +                         MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, +                         MachineInstr &MI, MachineOperand &FlagUse, +                         CondRegArray &CondRegs); +  void rewriteCMov(MachineBasicBlock &TestMBB, +                   MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, +                   MachineInstr &CMovI, MachineOperand &FlagUse, +                   CondRegArray &CondRegs); +  void rewriteCondJmp(MachineBasicBlock &TestMBB, +                      MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, +                      MachineInstr &JmpI, CondRegArray &CondRegs); +  void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse, +                   MachineInstr &CopyDefI); +  void rewriteSetCC(MachineBasicBlock &TestMBB, +                    MachineBasicBlock::iterator TestPos, DebugLoc TestLoc, +                    MachineInstr &SetCCI, MachineOperand &FlagUse, +                    CondRegArray &CondRegs); +}; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE, +                      "X86 EFLAGS copy lowering", false, false) +INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE, +                    "X86 EFLAGS copy lowering", false, false) + +FunctionPass *llvm::createX86FlagsCopyLoweringPass() { +  return new X86FlagsCopyLoweringPass(); +} + +char X86FlagsCopyLoweringPass::ID = 0; + +void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const { +  MachineFunctionPass::getAnalysisUsage(AU); +} + +namespace { +/// An enumeration of the arithmetic instruction mnemonics which have +/// interesting flag semantics. +/// +/// We can map instruction opcodes into these mnemonics to make it easy to +/// dispatch with specific functionality. +enum class FlagArithMnemonic { +  ADC, +  ADCX, +  ADOX, +  RCL, +  RCR, +  SBB, +}; +} // namespace + +static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) { +  switch (Opcode) { +  default: +    report_fatal_error("No support for lowering a copy into EFLAGS when used " +                       "by this instruction!"); + +#define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX)                              \ +  case X86::MNEMONIC##8##SUFFIX:                                               \ +  case X86::MNEMONIC##16##SUFFIX:                                              \ +  case X86::MNEMONIC##32##SUFFIX:                                              \ +  case X86::MNEMONIC##64##SUFFIX: + +#define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC)                                    \ +  LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr)                                        \ +  LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV)                                    \ +  LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm)                                        \ +  LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr)                                        \ +  case X86::MNEMONIC##8ri:                                                     \ +  case X86::MNEMONIC##16ri8:                                                   \ +  case X86::MNEMONIC##32ri8:                                                   \ +  case X86::MNEMONIC##64ri8:                                                   \ +  case X86::MNEMONIC##16ri:                                                    \ +  case X86::MNEMONIC##32ri:                                                    \ +  case X86::MNEMONIC##64ri32:                                                  \ +  case X86::MNEMONIC##8mi:                                                     \ +  case X86::MNEMONIC##16mi8:                                                   \ +  case X86::MNEMONIC##32mi8:                                                   \ +  case X86::MNEMONIC##64mi8:                                                   \ +  case X86::MNEMONIC##16mi:                                                    \ +  case X86::MNEMONIC##32mi:                                                    \ +  case X86::MNEMONIC##64mi32:                                                  \ +  case X86::MNEMONIC##8i8:                                                     \ +  case X86::MNEMONIC##16i16:                                                   \ +  case X86::MNEMONIC##32i32:                                                   \ +  case X86::MNEMONIC##64i32: + +    LLVM_EXPAND_ADC_SBB_INSTR(ADC) +    return FlagArithMnemonic::ADC; + +    LLVM_EXPAND_ADC_SBB_INSTR(SBB) +    return FlagArithMnemonic::SBB; + +#undef LLVM_EXPAND_ADC_SBB_INSTR + +    LLVM_EXPAND_INSTR_SIZES(RCL, rCL) +    LLVM_EXPAND_INSTR_SIZES(RCL, r1) +    LLVM_EXPAND_INSTR_SIZES(RCL, ri) +    return FlagArithMnemonic::RCL; + +    LLVM_EXPAND_INSTR_SIZES(RCR, rCL) +    LLVM_EXPAND_INSTR_SIZES(RCR, r1) +    LLVM_EXPAND_INSTR_SIZES(RCR, ri) +    return FlagArithMnemonic::RCR; + +#undef LLVM_EXPAND_INSTR_SIZES + +  case X86::ADCX32rr: +  case X86::ADCX64rr: +  case X86::ADCX32rm: +  case X86::ADCX64rm: +    return FlagArithMnemonic::ADCX; + +  case X86::ADOX32rr: +  case X86::ADOX64rr: +  case X86::ADOX32rm: +  case X86::ADOX64rm: +    return FlagArithMnemonic::ADOX; +  } +} + +static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB, +                                     MachineInstr &SplitI, +                                     const X86InstrInfo &TII) { +  MachineFunction &MF = *MBB.getParent(); + +  assert(SplitI.getParent() == &MBB && +         "Split instruction must be in the split block!"); +  assert(SplitI.isBranch() && +         "Only designed to split a tail of branch instructions!"); +  assert(X86::getCondFromBranchOpc(SplitI.getOpcode()) != X86::COND_INVALID && +         "Must split on an actual jCC instruction!"); + +  // Dig out the previous instruction to the split point. +  MachineInstr &PrevI = *std::prev(SplitI.getIterator()); +  assert(PrevI.isBranch() && "Must split after a branch!"); +  assert(X86::getCondFromBranchOpc(PrevI.getOpcode()) != X86::COND_INVALID && +         "Must split after an actual jCC instruction!"); +  assert(!std::prev(PrevI.getIterator())->isTerminator() && +         "Must only have this one terminator prior to the split!"); + +  // Grab the one successor edge that will stay in `MBB`. +  MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB(); + +  // Analyze the original block to see if we are actually splitting an edge +  // into two edges. This can happen when we have multiple conditional jumps to +  // the same successor. +  bool IsEdgeSplit = +      std::any_of(SplitI.getIterator(), MBB.instr_end(), +                  [&](MachineInstr &MI) { +                    assert(MI.isTerminator() && +                           "Should only have spliced terminators!"); +                    return llvm::any_of( +                        MI.operands(), [&](MachineOperand &MOp) { +                          return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc; +                        }); +                  }) || +      MBB.getFallThrough() == &UnsplitSucc; + +  MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock(); + +  // Insert the new block immediately after the current one. Any existing +  // fallthrough will be sunk into this new block anyways. +  MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB); + +  // Splice the tail of instructions into the new block. +  NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end()); + +  // Copy the necessary succesors (and their probability info) into the new +  // block. +  for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) +    if (IsEdgeSplit || *SI != &UnsplitSucc) +      NewMBB.copySuccessor(&MBB, SI); +  // Normalize the probabilities if we didn't end up splitting the edge. +  if (!IsEdgeSplit) +    NewMBB.normalizeSuccProbs(); + +  // Now replace all of the moved successors in the original block with the new +  // block. This will merge their probabilities. +  for (MachineBasicBlock *Succ : NewMBB.successors()) +    if (Succ != &UnsplitSucc) +      MBB.replaceSuccessor(Succ, &NewMBB); + +  // We should always end up replacing at least one successor. +  assert(MBB.isSuccessor(&NewMBB) && +         "Failed to make the new block a successor!"); + +  // Now update all the PHIs. +  for (MachineBasicBlock *Succ : NewMBB.successors()) { +    for (MachineInstr &MI : *Succ) { +      if (!MI.isPHI()) +        break; + +      for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; +           OpIdx += 2) { +        MachineOperand &OpV = MI.getOperand(OpIdx); +        MachineOperand &OpMBB = MI.getOperand(OpIdx + 1); +        assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!"); +        if (OpMBB.getMBB() != &MBB) +          continue; + +        // Replace the operand for unsplit successors +        if (!IsEdgeSplit || Succ != &UnsplitSucc) { +          OpMBB.setMBB(&NewMBB); + +          // We have to continue scanning as there may be multiple entries in +          // the PHI. +          continue; +        } + +        // When we have split the edge append a new successor. +        MI.addOperand(MF, OpV); +        MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB)); +        break; +      } +    } +  } + +  return NewMBB; +} + +bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) { +  DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName() +               << " **********\n"); + +  auto &Subtarget = MF.getSubtarget<X86Subtarget>(); +  MRI = &MF.getRegInfo(); +  TII = Subtarget.getInstrInfo(); +  TRI = Subtarget.getRegisterInfo(); +  PromoteRC = &X86::GR8RegClass; + +  if (MF.begin() == MF.end()) +    // Nothing to do for a degenerate empty function... +    return false; + +  SmallVector<MachineInstr *, 4> Copies; +  for (MachineBasicBlock &MBB : MF) +    for (MachineInstr &MI : MBB) +      if (MI.getOpcode() == TargetOpcode::COPY && +          MI.getOperand(0).getReg() == X86::EFLAGS) +        Copies.push_back(&MI); + +  for (MachineInstr *CopyI : Copies) { +    MachineBasicBlock &MBB = *CopyI->getParent(); + +    MachineOperand &VOp = CopyI->getOperand(1); +    assert(VOp.isReg() && +           "The input to the copy for EFLAGS should always be a register!"); +    MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg()); +    if (CopyDefI.getOpcode() != TargetOpcode::COPY) { +      // FIXME: The big likely candidate here are PHI nodes. We could in theory +      // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard +      // enough that it is probably better to change every other part of LLVM +      // to avoid creating them. The issue is that once we have PHIs we won't +      // know which original EFLAGS value we need to capture with our setCCs +      // below. The end result will be computing a complete set of setCCs that +      // we *might* want, computing them in every place where we copy *out* of +      // EFLAGS and then doing SSA formation on all of them to insert necessary +      // PHI nodes and consume those here. Then hoping that somehow we DCE the +      // unnecessary ones. This DCE seems very unlikely to be successful and so +      // we will almost certainly end up with a glut of dead setCC +      // instructions. Until we have a motivating test case and fail to avoid +      // it by changing other parts of LLVM's lowering, we refuse to handle +      // this complex case here. +      DEBUG(dbgs() << "ERROR: Encountered unexpected def of an eflags copy: "; +            CopyDefI.dump()); +      report_fatal_error( +          "Cannot lower EFLAGS copy unless it is defined in turn by a copy!"); +    } + +    auto Cleanup = make_scope_exit([&] { +      // All uses of the EFLAGS copy are now rewritten, kill the copy into +      // eflags and if dead the copy from. +      CopyI->eraseFromParent(); +      if (MRI->use_empty(CopyDefI.getOperand(0).getReg())) +        CopyDefI.eraseFromParent(); +      ++NumCopiesEliminated; +    }); + +    MachineOperand &DOp = CopyI->getOperand(0); +    assert(DOp.isDef() && "Expected register def!"); +    assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!"); +    if (DOp.isDead()) +      continue; + +    MachineBasicBlock &TestMBB = *CopyDefI.getParent(); +    auto TestPos = CopyDefI.getIterator(); +    DebugLoc TestLoc = CopyDefI.getDebugLoc(); + +    DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump()); + +    // Scan for usage of newly set EFLAGS so we can rewrite them. We just buffer +    // jumps because their usage is very constrained. +    bool FlagsKilled = false; +    SmallVector<MachineInstr *, 4> JmpIs; + +    // Gather the condition flags that have already been preserved in +    // registers. We do this from scratch each time as we expect there to be +    // very few of them and we expect to not revisit the same copy definition +    // many times. If either of those change sufficiently we could build a map +    // of these up front instead. +    CondRegArray CondRegs = collectCondsInRegs(TestMBB, CopyDefI); + +    for (auto MII = std::next(CopyI->getIterator()), MIE = MBB.instr_end(); +         MII != MIE;) { +      MachineInstr &MI = *MII++; +      MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS); +      if (!FlagUse) { +        if (MI.findRegisterDefOperand(X86::EFLAGS)) { +          // If EFLAGS are defined, it's as-if they were killed. We can stop +          // scanning here. +          // +          // NB!!! Many instructions only modify some flags. LLVM currently +          // models this as clobbering all flags, but if that ever changes this +          // will need to be carefully updated to handle that more complex +          // logic. +          FlagsKilled = true; +          break; +        } +        continue; +      } + +      DEBUG(dbgs() << "  Rewriting use: "; MI.dump()); + +      // Check the kill flag before we rewrite as that may change it. +      if (FlagUse->isKill()) +        FlagsKilled = true; + +      // Once we encounter a branch, the rest of the instructions must also be +      // branches. We can't rewrite in place here, so we handle them below. +      // +      // Note that we don't have to handle tail calls here, even conditional +      // tail calls, as those are not introduced into the X86 MI until post-RA +      // branch folding or black placement. As a consequence, we get to deal +      // with the simpler formulation of conditional branches followed by tail +      // calls. +      if (X86::getCondFromBranchOpc(MI.getOpcode()) != X86::COND_INVALID) { +        auto JmpIt = MI.getIterator(); +        do { +          JmpIs.push_back(&*JmpIt); +          ++JmpIt; +        } while (JmpIt != MBB.instr_end() && +                 X86::getCondFromBranchOpc(JmpIt->getOpcode()) != +                     X86::COND_INVALID); +        break; +      } + +      // Otherwise we can just rewrite in-place. +      if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) { +        rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); +      } else if (X86::getCondFromSETOpc(MI.getOpcode()) != X86::COND_INVALID) { +        rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); +      } else if (MI.getOpcode() == TargetOpcode::COPY) { +        rewriteCopy(MI, *FlagUse, CopyDefI); +      } else { +        // We assume that arithmetic instructions that use flags also def them. +        assert(MI.findRegisterDefOperand(X86::EFLAGS) && +               "Expected a def of EFLAGS for this instruction!"); + +        // NB!!! Several arithmetic instructions only *partially* update +        // flags. Theoretically, we could generate MI code sequences that +        // would rely on this fact and observe different flags independently. +        // But currently LLVM models all of these instructions as clobbering +        // all the flags in an undef way. We rely on that to simplify the +        // logic. +        FlagsKilled = true; + +        rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs); +        break; +      } + +      // If this was the last use of the flags, we're done. +      if (FlagsKilled) +        break; +    } + +    // If we didn't find a kill (or equivalent) check that the flags don't +    // live-out of the basic block. Currently we don't support lowering copies +    // of flags that live out in this fashion. +    if (!FlagsKilled && +        llvm::any_of(MBB.successors(), [](MachineBasicBlock *SuccMBB) { +          return SuccMBB->isLiveIn(X86::EFLAGS); +        })) { +      DEBUG({ +        dbgs() << "ERROR: Found a copied EFLAGS live-out from basic block:\n" +               << "----\n"; +        MBB.dump(); +        dbgs() << "----\n" +               << "ERROR: Cannot lower this EFLAGS copy!\n"; +      }); +      report_fatal_error( +          "Cannot lower EFLAGS copy that lives out of a basic block!"); +    } + +    // Now rewrite the jumps that use the flags. These we handle specially +    // because if there are multiple jumps we'll have to do surgery on the CFG. +    for (MachineInstr *JmpI : JmpIs) { +      // Past the first jump we need to split the blocks apart. +      if (JmpI != JmpIs.front()) +        splitBlock(*JmpI->getParent(), *JmpI, *TII); + +      rewriteCondJmp(TestMBB, TestPos, TestLoc, *JmpI, CondRegs); +    } + +    // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if +    // the copy's def operand is itself a kill. +  } + +#ifndef NDEBUG +  for (MachineBasicBlock &MBB : MF) +    for (MachineInstr &MI : MBB) +      if (MI.getOpcode() == TargetOpcode::COPY && +          (MI.getOperand(0).getReg() == X86::EFLAGS || +           MI.getOperand(1).getReg() == X86::EFLAGS)) { +        DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: "; MI.dump()); +        llvm_unreachable("Unlowered EFLAGS copy!"); +      } +#endif + +  return true; +} + +/// Collect any conditions that have already been set in registers so that we +/// can re-use them rather than adding duplicates. +CondRegArray +X86FlagsCopyLoweringPass::collectCondsInRegs(MachineBasicBlock &MBB, +                                             MachineInstr &CopyDefI) { +  CondRegArray CondRegs = {}; + +  // Scan backwards across the range of instructions with live EFLAGS. +  for (MachineInstr &MI : llvm::reverse( +           llvm::make_range(MBB.instr_begin(), CopyDefI.getIterator()))) { +    X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode()); +    if (Cond != X86::COND_INVALID && MI.getOperand(0).isReg() && +        TRI->isVirtualRegister(MI.getOperand(0).getReg())) +      CondRegs[Cond] = MI.getOperand(0).getReg(); + +    // Stop scanning when we see the first definition of the EFLAGS as prior to +    // this we would potentially capture the wrong flag state. +    if (MI.findRegisterDefOperand(X86::EFLAGS)) +      break; +  } +  return CondRegs; +} + +unsigned X86FlagsCopyLoweringPass::promoteCondToReg( +    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, +    DebugLoc TestLoc, X86::CondCode Cond) { +  unsigned Reg = MRI->createVirtualRegister(PromoteRC); +  auto SetI = BuildMI(TestMBB, TestPos, TestLoc, +                      TII->get(X86::getSETFromCond(Cond)), Reg); +  (void)SetI; +  DEBUG(dbgs() << "    save cond: "; SetI->dump()); +  ++NumSetCCsInserted; +  return Reg; +} + +std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg( +    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, +    DebugLoc TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) { +  unsigned &CondReg = CondRegs[Cond]; +  unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)]; +  if (!CondReg && !InvCondReg) +    CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); + +  if (CondReg) +    return {CondReg, false}; +  else +    return {InvCondReg, true}; +} + +void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB, +                                          MachineBasicBlock::iterator Pos, +                                          DebugLoc Loc, unsigned Reg) { +  // We emit test instructions as register/immediate test against -1. This +  // allows register allocation to fold a memory operand if needed (that will +  // happen often due to the places this code is emitted). But hopefully will +  // also allow us to select a shorter encoding of `testb %reg, %reg` when that +  // would be equivalent. +  auto TestI = +      BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8ri)).addReg(Reg).addImm(-1); +  (void)TestI; +  DEBUG(dbgs() << "    test cond: "; TestI->dump()); +  ++NumTestsInserted; +} + +void X86FlagsCopyLoweringPass::rewriteArithmetic( +    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, +    DebugLoc TestLoc, MachineInstr &MI, MachineOperand &FlagUse, +    CondRegArray &CondRegs) { +  // Arithmetic is either reading CF or OF. Figure out which condition we need +  // to preserve in a register. +  X86::CondCode Cond; + +  // The addend to use to reset CF or OF when added to the flag value. +  int Addend; + +  switch (getMnemonicFromOpcode(MI.getOpcode())) { +  case FlagArithMnemonic::ADC: +  case FlagArithMnemonic::ADCX: +  case FlagArithMnemonic::RCL: +  case FlagArithMnemonic::RCR: +  case FlagArithMnemonic::SBB: +    Cond = X86::COND_B; // CF == 1 +    // Set up an addend that when one is added will need a carry due to not +    // having a higher bit available. +    Addend = 255; +    break; + +  case FlagArithMnemonic::ADOX: +    Cond = X86::COND_O; // OF == 1 +    // Set up an addend that when one is added will turn from positive to +    // negative and thus overflow in the signed domain. +    Addend = 127; +    break; +  } + +  // Now get a register that contains the value of the flag input to the +  // arithmetic. We require exactly this flag to simplify the arithmetic +  // required to materialize it back into the flag. +  unsigned &CondReg = CondRegs[Cond]; +  if (!CondReg) +    CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); + +  MachineBasicBlock &MBB = *MI.getParent(); + +  // Insert an instruction that will set the flag back to the desired value. +  unsigned TmpReg = MRI->createVirtualRegister(PromoteRC); +  auto AddI = +      BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri)) +          .addDef(TmpReg, RegState::Dead) +          .addReg(CondReg) +          .addImm(Addend); +  (void)AddI; +  DEBUG(dbgs() << "    add cond: "; AddI->dump()); +  ++NumAddsInserted; +  FlagUse.setIsKill(true); +} + +void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB, +                                           MachineBasicBlock::iterator TestPos, +                                           DebugLoc TestLoc, +                                           MachineInstr &CMovI, +                                           MachineOperand &FlagUse, +                                           CondRegArray &CondRegs) { +  // First get the register containing this specific condition. +  X86::CondCode Cond = X86::getCondFromCMovOpc(CMovI.getOpcode()); +  unsigned CondReg; +  bool Inverted; +  std::tie(CondReg, Inverted) = +      getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); + +  MachineBasicBlock &MBB = *CMovI.getParent(); + +  // Insert a direct test of the saved register. +  insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg); + +  // Rewrite the CMov to use the !ZF flag from the test (but match register +  // size and memory operand), and then kill its use of the flags afterward. +  auto &CMovRC = *MRI->getRegClass(CMovI.getOperand(0).getReg()); +  CMovI.setDesc(TII->get(X86::getCMovFromCond( +      Inverted ? X86::COND_E : X86::COND_NE, TRI->getRegSizeInBits(CMovRC) / 8, +      !CMovI.memoperands_empty()))); +  FlagUse.setIsKill(true); +  DEBUG(dbgs() << "    fixed cmov: "; CMovI.dump()); +} + +void X86FlagsCopyLoweringPass::rewriteCondJmp( +    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos, +    DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) { +  // First get the register containing this specific condition. +  X86::CondCode Cond = X86::getCondFromBranchOpc(JmpI.getOpcode()); +  unsigned CondReg; +  bool Inverted; +  std::tie(CondReg, Inverted) = +      getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs); + +  MachineBasicBlock &JmpMBB = *JmpI.getParent(); + +  // Insert a direct test of the saved register. +  insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg); + +  // Rewrite the jump to use the !ZF flag from the test, and kill its use of +  // flags afterward. +  JmpI.setDesc(TII->get( +      X86::GetCondBranchFromCond(Inverted ? X86::COND_E : X86::COND_NE))); +  const int ImplicitEFLAGSOpIdx = 1; +  JmpI.getOperand(ImplicitEFLAGSOpIdx).setIsKill(true); +  DEBUG(dbgs() << "    fixed jCC: "; JmpI.dump()); +} + +void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI, +                                           MachineOperand &FlagUse, +                                           MachineInstr &CopyDefI) { +  // Just replace this copy with the the original copy def. +  MRI->replaceRegWith(MI.getOperand(0).getReg(), +                      CopyDefI.getOperand(0).getReg()); +  MI.eraseFromParent(); +} + +void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB, +                                            MachineBasicBlock::iterator TestPos, +                                            DebugLoc TestLoc, +                                            MachineInstr &SetCCI, +                                            MachineOperand &FlagUse, +                                            CondRegArray &CondRegs) { +  X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode()); +  // Note that we can't usefully rewrite this to the inverse without complex +  // analysis of the users of the setCC. Largely we rely on duplicates which +  // could have been avoided already being avoided here. +  unsigned &CondReg = CondRegs[Cond]; +  if (!CondReg) +    CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond); + +  // Rewriting this is trivial: we just replace the register and remove the +  // setcc. +  MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg); +  SetCCI.eraseFromParent(); +} diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b718fb6ee5b..b119a1fb20a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38721,25 +38721,6 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {    }  } -/// This function checks if any of the users of EFLAGS copies the EFLAGS. We -/// know that the code that lowers COPY of EFLAGS has to use the stack, and if -/// we don't adjust the stack we clobber the first frame index. -/// See X86InstrInfo::copyPhysReg. -static bool hasCopyImplyingStackAdjustment(const MachineFunction &MF) { -  const MachineRegisterInfo &MRI = MF.getRegInfo(); -  return any_of(MRI.reg_instructions(X86::EFLAGS), -                [](const MachineInstr &RI) { return RI.isCopy(); }); -} - -void X86TargetLowering::finalizeLowering(MachineFunction &MF) const { -  if (hasCopyImplyingStackAdjustment(MF)) { -    MachineFrameInfo &MFI = MF.getFrameInfo(); -    MFI.setHasCopyImplyingStackAdjustment(true); -  } - -  TargetLoweringBase::finalizeLowering(MF); -} -  SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,                                                    SDValue Value, SDValue Addr,                                                    SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 2fdc0c22e39..517ac3a1123 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1121,8 +1121,6 @@ namespace llvm {      bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,                                 unsigned Factor) const override; -    void finalizeLowering(MachineFunction &MF) const override; -      SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,                                      SDValue Addr, SelectionDAG &DAG)                                      const override; diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 94a0bee7e8e..a39d424e69a 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6796,102 +6796,12 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,      return;    } -  bool FromEFLAGS = SrcReg == X86::EFLAGS; -  bool ToEFLAGS = DestReg == X86::EFLAGS; -  int Reg = FromEFLAGS ? DestReg : SrcReg; -  bool is32 = X86::GR32RegClass.contains(Reg); -  bool is64 = X86::GR64RegClass.contains(Reg); - -  if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) { -    int Mov = is64 ? X86::MOV64rr : X86::MOV32rr; -    int Push = is64 ? X86::PUSH64r : X86::PUSH32r; -    int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32; -    int Pop = is64 ? X86::POP64r : X86::POP32r; -    int PopF = is64 ? X86::POPF64 : X86::POPF32; -    int AX = is64 ? X86::RAX : X86::EAX; - -    if (!Subtarget.hasLAHFSAHF()) { -      assert(Subtarget.is64Bit() && -             "Not having LAHF/SAHF only happens on 64-bit."); -      // Moving EFLAGS to / from another register requires a push and a pop. -      // Notice that we have to adjust the stack if we don't want to clobber the -      // first frame index. See X86FrameLowering.cpp - usesTheStack. -      if (FromEFLAGS) { -        BuildMI(MBB, MI, DL, get(PushF)); -        BuildMI(MBB, MI, DL, get(Pop), DestReg); -      } -      if (ToEFLAGS) { -        BuildMI(MBB, MI, DL, get(Push)) -            .addReg(SrcReg, getKillRegState(KillSrc)); -        BuildMI(MBB, MI, DL, get(PopF)); -      } -      return; -    } - -    // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is -    // inefficient. Instead: -    //   - Save the overflow flag OF into AL using SETO, and restore it using a -    //     signed 8-bit addition of AL and INT8_MAX. -    //   - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH -    //     using LAHF/SAHF. -    //   - When RAX/EAX is live and isn't the destination register, make sure it -    //     isn't clobbered by PUSH/POP'ing it before and after saving/restoring -    //     the flags. -    // This approach is ~2.25x faster than using PUSHF/POPF. -    // -    // This is still somewhat inefficient because we don't know which flags are -    // actually live inside EFLAGS. Were we able to do a single SETcc instead of -    // SETO+LAHF / ADDB+SAHF the code could be 1.02x faster. -    // -    // PUSHF/POPF is also potentially incorrect because it affects other flags -    // such as TF/IF/DF, which LLVM doesn't model. -    // -    // Notice that we have to adjust the stack if we don't want to clobber the -    // first frame index. -    // See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment. - -    const TargetRegisterInfo &TRI = getRegisterInfo(); -    MachineBasicBlock::LivenessQueryResult LQR = -        MBB.computeRegisterLiveness(&TRI, AX, MI); -    // We do not want to save and restore AX if we do not have to. -    // Moreover, if we do so whereas AX is dead, we would need to set -    // an undef flag on the use of AX, otherwise the verifier will -    // complain that we read an undef value. -    // We do not want to change the behavior of the machine verifier -    // as this is usually wrong to read an undef value. -    if (MachineBasicBlock::LQR_Unknown == LQR) { -      LivePhysRegs LPR(TRI); -      LPR.addLiveOuts(MBB); -      MachineBasicBlock::iterator I = MBB.end(); -      while (I != MI) { -        --I; -        LPR.stepBackward(*I); -      } -      // AX contains the top most register in the aliasing hierarchy. -      // It may not be live, but one of its aliases may be. -      for (MCRegAliasIterator AI(AX, &TRI, true); -           AI.isValid() && LQR != MachineBasicBlock::LQR_Live; ++AI) -        LQR = LPR.contains(*AI) ? MachineBasicBlock::LQR_Live -                                : MachineBasicBlock::LQR_Dead; -    } -    bool AXDead = (Reg == AX) || (MachineBasicBlock::LQR_Dead == LQR); -    if (!AXDead) -      BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true)); -    if (FromEFLAGS) { -      BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL); -      BuildMI(MBB, MI, DL, get(X86::LAHF)); -      BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX); -    } -    if (ToEFLAGS) { -      BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc)); -      BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL) -          .addReg(X86::AL) -          .addImm(INT8_MAX); -      BuildMI(MBB, MI, DL, get(X86::SAHF)); -    } -    if (!AXDead) -      BuildMI(MBB, MI, DL, get(Pop), AX); -    return; +  if (SrcReg == X86::EFLAGS || DestReg == X86::EFLAGS) { +    // FIXME: We use a fatal error here because historically LLVM has tried +    // lower some of these physreg copies and we want to ensure we get +    // reasonable bug reports if someone encounters a case no other testing +    // found. This path should be removed after the LLVM 7 release. +    report_fatal_error("Unable to copy EFLAGS physical register!");    }    DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index fe901fc2dd1..87dadd9966c 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -64,6 +64,7 @@ void initializeX86CmovConverterPassPass(PassRegistry &);  void initializeX86ExecutionDomainFixPass(PassRegistry &);  void initializeX86DomainReassignmentPass(PassRegistry &);  void initializeX86AvoidSFBPassPass(PassRegistry &); +void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);  } // end namespace llvm @@ -84,6 +85,7 @@ extern "C" void LLVMInitializeX86Target() {    initializeX86ExecutionDomainFixPass(PR);    initializeX86DomainReassignmentPass(PR);    initializeX86AvoidSFBPassPass(PR); +  initializeX86FlagsCopyLoweringPassPass(PR);  }  static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { @@ -456,6 +458,7 @@ void X86PassConfig::addPreRegAlloc() {      addPass(createX86AvoidStoreForwardingBlocks());    } +  addPass(createX86FlagsCopyLoweringPass());    addPass(createX86WinAllocaExpander());  }  void X86PassConfig::addMachineSSAOptimization() { diff --git a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll index 606a2d779e9..e8098dd98ee 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/add-scalar.ll @@ -10,16 +10,10 @@ define i64 @test_add_i64(i64 %arg1, i64 %arg2) {  ;  ; X32-LABEL: test_add_i64:  ; X32:       # %bb.0: -; X32-NEXT:    pushl %ebp -; X32-NEXT:    .cfi_def_cfa_offset 8 -; X32-NEXT:    .cfi_offset %ebp, -8 -; X32-NEXT:    movl %esp, %ebp -; X32-NEXT:    .cfi_def_cfa_register %ebp -; X32-NEXT:    movl 16(%ebp), %eax -; X32-NEXT:    movl 20(%ebp), %edx -; X32-NEXT:    addl 8(%ebp), %eax -; X32-NEXT:    adcl 12(%ebp), %edx -; X32-NEXT:    popl %ebp +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx +; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    adcl {{[0-9]+}}(%esp), %edx  ; X32-NEXT:    retl    %ret = add i64 %arg1, %arg2    ret i64 %ret diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index 6dab866a751..b79fdef2ff1 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -37,6 +37,7 @@  ; CHECK-NEXT:       X86 PIC Global Base Reg Initialization  ; CHECK-NEXT:       Expand ISel Pseudo-instructions  ; CHECK-NEXT:       Local Stack Slot Allocation +; CHECK-NEXT:       X86 EFLAGS copy lowering  ; CHECK-NEXT:       X86 WinAlloca Expander  ; CHECK-NEXT:       Eliminate PHI nodes for register allocation  ; CHECK-NEXT:       Two-Address instruction pass diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll index 9c69628091b..01deba7eed1 100644 --- a/llvm/test/CodeGen/X86/O3-pipeline.ll +++ b/llvm/test/CodeGen/X86/O3-pipeline.ll @@ -90,6 +90,7 @@  ; CHECK-NEXT:       X86 LEA Optimize  ; CHECK-NEXT:       X86 Optimize Call Frame  ; CHECK-NEXT:       X86 Avoid Store Forwarding Block +; CHECK-NEXT:       X86 EFLAGS copy lowering  ; CHECK-NEXT:       X86 WinAlloca Expander  ; CHECK-NEXT:       Detect Dead Lanes  ; CHECK-NEXT:       Process Implicit Definitions diff --git a/llvm/test/CodeGen/X86/clobber-fi0.ll b/llvm/test/CodeGen/X86/clobber-fi0.ll deleted file mode 100644 index b69b1853160..00000000000 --- a/llvm/test/CodeGen/X86/clobber-fi0.ll +++ /dev/null @@ -1,37 +0,0 @@ -; RUN: llc < %s -verify-machineinstrs -mcpu=generic -mtriple=x86_64-linux | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-apple-macosx10.7.0" - -; In the code below we need to copy the EFLAGS because of scheduling constraints. -; When copying the EFLAGS we need to write to the stack with push/pop. This forces -; us to emit the prolog. - -; CHECK: main -; CHECK: subq{{.*}}rsp -; CHECK: ret -define i32 @main(i32 %arg, i8** %arg1) nounwind { -bb: -  %tmp = alloca i32, align 4                      ; [#uses=3 type=i32*] -  %tmp2 = alloca i32, align 4                     ; [#uses=3 type=i32*] -  %tmp3 = alloca i32                              ; [#uses=1 type=i32*] -  store volatile i32 1, i32* %tmp, align 4 -  store volatile i32 1, i32* %tmp2, align 4 -  br label %bb4 - -bb4:                                              ; preds = %bb4, %bb -  %tmp6 = load volatile i32, i32* %tmp2, align 4                ; [#uses=1 type=i32] -  %tmp7 = add i32 %tmp6, -1                       ; [#uses=2 type=i32] -  store volatile i32 %tmp7, i32* %tmp2, align 4 -  %tmp8 = icmp eq i32 %tmp7, 0                    ; [#uses=1 type=i1] -  %tmp9 = load volatile i32, i32* %tmp                          ; [#uses=1 type=i32] -  %tmp10 = add i32 %tmp9, -1              ; [#uses=1 type=i32] -  store volatile i32 %tmp10, i32* %tmp3 -  br i1 %tmp8, label %bb11, label %bb4 - -bb11:                                             ; preds = %bb4 -  %tmp12 = load volatile i32, i32* %tmp, align 4                ; [#uses=1 type=i32] -  ret i32 %tmp12 -} - - diff --git a/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll index 50352e2abe9..2060ac65446 100644 --- a/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll +++ b/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll @@ -1,15 +1,12 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA -; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA +; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA +; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA -; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA -; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF -; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF - -; TODO: Reenable verify-machineinstr once the if (!AXDead) // FIXME -; in X86InstrInfo::copyPhysReg() is resolved. +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF  declare i32 @foo()  declare i32 @bar(i64) @@ -22,37 +19,29 @@ declare i32 @bar(i64)  ; ...  ; use of eax  ; During PEI the adjcallstackdown32 is replaced with the subl which -; clobbers eflags, effectively interfering in the liveness interval. -; Is this a case we care about? Maybe no, considering this issue -; happens with the fast pre-regalloc scheduler enforced. A more -; performant scheduler would move the adjcallstackdown32 out of the -; eflags liveness interval. +; clobbers eflags, effectively interfering in the liveness interval. However, +; we then promote these copies into independent conditions in GPRs that avoids +; repeated saving and restoring logic and can be trivially managed by the +; register allocator.  define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {  ; 32-GOOD-RA-LABEL: test_intervening_call:  ; 32-GOOD-RA:       # %bb.0: # %entry -; 32-GOOD-RA-NEXT:    pushl %ebp -; 32-GOOD-RA-NEXT:    movl %esp, %ebp  ; 32-GOOD-RA-NEXT:    pushl %ebx  ; 32-GOOD-RA-NEXT:    pushl %esi -; 32-GOOD-RA-NEXT:    movl 12(%ebp), %eax -; 32-GOOD-RA-NEXT:    movl 16(%ebp), %edx -; 32-GOOD-RA-NEXT:    movl 20(%ebp), %ebx -; 32-GOOD-RA-NEXT:    movl 24(%ebp), %ecx -; 32-GOOD-RA-NEXT:    movl 8(%ebp), %esi -; 32-GOOD-RA-NEXT:    lock cmpxchg8b (%esi)  ; 32-GOOD-RA-NEXT:    pushl %eax -; 32-GOOD-RA-NEXT:    seto %al -; 32-GOOD-RA-NEXT:    lahf -; 32-GOOD-RA-NEXT:    movl %eax, %esi -; 32-GOOD-RA-NEXT:    popl %eax +; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %eax +; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %edx +; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %ebx +; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %esi +; 32-GOOD-RA-NEXT:    lock cmpxchg8b (%esi) +; 32-GOOD-RA-NEXT:    setne %bl  ; 32-GOOD-RA-NEXT:    subl $8, %esp  ; 32-GOOD-RA-NEXT:    pushl %edx  ; 32-GOOD-RA-NEXT:    pushl %eax  ; 32-GOOD-RA-NEXT:    calll bar  ; 32-GOOD-RA-NEXT:    addl $16, %esp -; 32-GOOD-RA-NEXT:    movl %esi, %eax -; 32-GOOD-RA-NEXT:    addb $127, %al -; 32-GOOD-RA-NEXT:    sahf +; 32-GOOD-RA-NEXT:    testb $-1, %bl  ; 32-GOOD-RA-NEXT:    jne .LBB0_3  ; 32-GOOD-RA-NEXT:  # %bb.1: # %t  ; 32-GOOD-RA-NEXT:    movl $42, %eax @@ -61,46 +50,29 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {  ; 32-GOOD-RA-NEXT:    xorl %eax, %eax  ; 32-GOOD-RA-NEXT:  .LBB0_2: # %t  ; 32-GOOD-RA-NEXT:    xorl %edx, %edx +; 32-GOOD-RA-NEXT:    addl $4, %esp  ; 32-GOOD-RA-NEXT:    popl %esi  ; 32-GOOD-RA-NEXT:    popl %ebx -; 32-GOOD-RA-NEXT:    popl %ebp  ; 32-GOOD-RA-NEXT:    retl  ;  ; 32-FAST-RA-LABEL: test_intervening_call:  ; 32-FAST-RA:       # %bb.0: # %entry -; 32-FAST-RA-NEXT:    pushl %ebp -; 32-FAST-RA-NEXT:    movl %esp, %ebp  ; 32-FAST-RA-NEXT:    pushl %ebx  ; 32-FAST-RA-NEXT:    pushl %esi -; 32-FAST-RA-NEXT:    movl 8(%ebp), %esi -; 32-FAST-RA-NEXT:    movl 20(%ebp), %ebx -; 32-FAST-RA-NEXT:    movl 24(%ebp), %ecx -; 32-FAST-RA-NEXT:    movl 12(%ebp), %eax -; 32-FAST-RA-NEXT:    movl 16(%ebp), %edx -; 32-FAST-RA-NEXT:    lock cmpxchg8b (%esi)  ; 32-FAST-RA-NEXT:    pushl %eax -; 32-FAST-RA-NEXT:    seto %al -; 32-FAST-RA-NEXT:    lahf -; 32-FAST-RA-NEXT:    movl %eax, %ecx -; 32-FAST-RA-NEXT:    popl %eax +; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %esi +; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %ebx +; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %eax +; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %edx +; 32-FAST-RA-NEXT:    lock cmpxchg8b (%esi) +; 32-FAST-RA-NEXT:    setne %bl  ; 32-FAST-RA-NEXT:    subl $8, %esp -; 32-FAST-RA-NEXT:    pushl %eax -; 32-FAST-RA-NEXT:    movl %ecx, %eax -; 32-FAST-RA-NEXT:    addb $127, %al -; 32-FAST-RA-NEXT:    sahf -; 32-FAST-RA-NEXT:    popl %eax -; 32-FAST-RA-NEXT:    pushl %eax -; 32-FAST-RA-NEXT:    seto %al -; 32-FAST-RA-NEXT:    lahf -; 32-FAST-RA-NEXT:    movl %eax, %esi -; 32-FAST-RA-NEXT:    popl %eax  ; 32-FAST-RA-NEXT:    pushl %edx  ; 32-FAST-RA-NEXT:    pushl %eax  ; 32-FAST-RA-NEXT:    calll bar  ; 32-FAST-RA-NEXT:    addl $16, %esp -; 32-FAST-RA-NEXT:    movl %esi, %eax -; 32-FAST-RA-NEXT:    addb $127, %al -; 32-FAST-RA-NEXT:    sahf +; 32-FAST-RA-NEXT:    testb $-1, %bl  ; 32-FAST-RA-NEXT:    jne .LBB0_3  ; 32-FAST-RA-NEXT:  # %bb.1: # %t  ; 32-FAST-RA-NEXT:    movl $42, %eax @@ -109,122 +81,29 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {  ; 32-FAST-RA-NEXT:    xorl %eax, %eax  ; 32-FAST-RA-NEXT:  .LBB0_2: # %t  ; 32-FAST-RA-NEXT:    xorl %edx, %edx +; 32-FAST-RA-NEXT:    addl $4, %esp  ; 32-FAST-RA-NEXT:    popl %esi  ; 32-FAST-RA-NEXT:    popl %ebx -; 32-FAST-RA-NEXT:    popl %ebp  ; 32-FAST-RA-NEXT:    retl  ; -; 64-GOOD-RA-LABEL: test_intervening_call: -; 64-GOOD-RA:       # %bb.0: # %entry -; 64-GOOD-RA-NEXT:    pushq %rbp -; 64-GOOD-RA-NEXT:    movq %rsp, %rbp -; 64-GOOD-RA-NEXT:    pushq %rbx -; 64-GOOD-RA-NEXT:    pushq %rax -; 64-GOOD-RA-NEXT:    movq %rsi, %rax -; 64-GOOD-RA-NEXT:    lock cmpxchgq %rdx, (%rdi) -; 64-GOOD-RA-NEXT:    pushfq -; 64-GOOD-RA-NEXT:    popq %rbx -; 64-GOOD-RA-NEXT:    movq %rax, %rdi -; 64-GOOD-RA-NEXT:    callq bar -; 64-GOOD-RA-NEXT:    pushq %rbx -; 64-GOOD-RA-NEXT:    popfq -; 64-GOOD-RA-NEXT:    jne .LBB0_3 -; 64-GOOD-RA-NEXT:  # %bb.1: # %t -; 64-GOOD-RA-NEXT:    movl $42, %eax -; 64-GOOD-RA-NEXT:    jmp .LBB0_2 -; 64-GOOD-RA-NEXT:  .LBB0_3: # %f -; 64-GOOD-RA-NEXT:    xorl %eax, %eax -; 64-GOOD-RA-NEXT:  .LBB0_2: # %t -; 64-GOOD-RA-NEXT:    addq $8, %rsp -; 64-GOOD-RA-NEXT:    popq %rbx -; 64-GOOD-RA-NEXT:    popq %rbp -; 64-GOOD-RA-NEXT:    retq -; -; 64-FAST-RA-LABEL: test_intervening_call: -; 64-FAST-RA:       # %bb.0: # %entry -; 64-FAST-RA-NEXT:    pushq %rbp -; 64-FAST-RA-NEXT:    movq %rsp, %rbp -; 64-FAST-RA-NEXT:    pushq %rbx -; 64-FAST-RA-NEXT:    pushq %rax -; 64-FAST-RA-NEXT:    movq %rsi, %rax -; 64-FAST-RA-NEXT:    lock cmpxchgq %rdx, (%rdi) -; 64-FAST-RA-NEXT:    pushfq -; 64-FAST-RA-NEXT:    popq %rbx -; 64-FAST-RA-NEXT:    movq %rax, %rdi -; 64-FAST-RA-NEXT:    callq bar -; 64-FAST-RA-NEXT:    pushq %rbx -; 64-FAST-RA-NEXT:    popfq -; 64-FAST-RA-NEXT:    jne .LBB0_3 -; 64-FAST-RA-NEXT:  # %bb.1: # %t -; 64-FAST-RA-NEXT:    movl $42, %eax -; 64-FAST-RA-NEXT:    jmp .LBB0_2 -; 64-FAST-RA-NEXT:  .LBB0_3: # %f -; 64-FAST-RA-NEXT:    xorl %eax, %eax -; 64-FAST-RA-NEXT:  .LBB0_2: # %t -; 64-FAST-RA-NEXT:    addq $8, %rsp -; 64-FAST-RA-NEXT:    popq %rbx -; 64-FAST-RA-NEXT:    popq %rbp -; 64-FAST-RA-NEXT:    retq -; -; 64-GOOD-RA-SAHF-LABEL: test_intervening_call: -; 64-GOOD-RA-SAHF:       # %bb.0: # %entry -; 64-GOOD-RA-SAHF-NEXT:    pushq %rbp -; 64-GOOD-RA-SAHF-NEXT:    movq %rsp, %rbp -; 64-GOOD-RA-SAHF-NEXT:    pushq %rbx -; 64-GOOD-RA-SAHF-NEXT:    pushq %rax -; 64-GOOD-RA-SAHF-NEXT:    movq %rsi, %rax -; 64-GOOD-RA-SAHF-NEXT:    lock cmpxchgq %rdx, (%rdi) -; 64-GOOD-RA-SAHF-NEXT:    pushq %rax -; 64-GOOD-RA-SAHF-NEXT:    seto %al -; 64-GOOD-RA-SAHF-NEXT:    lahf -; 64-GOOD-RA-SAHF-NEXT:    movq %rax, %rbx -; 64-GOOD-RA-SAHF-NEXT:    popq %rax -; 64-GOOD-RA-SAHF-NEXT:    movq %rax, %rdi -; 64-GOOD-RA-SAHF-NEXT:    callq bar -; 64-GOOD-RA-SAHF-NEXT:    movq %rbx, %rax -; 64-GOOD-RA-SAHF-NEXT:    addb $127, %al -; 64-GOOD-RA-SAHF-NEXT:    sahf -; 64-GOOD-RA-SAHF-NEXT:    jne .LBB0_3 -; 64-GOOD-RA-SAHF-NEXT:  # %bb.1: # %t -; 64-GOOD-RA-SAHF-NEXT:    movl $42, %eax -; 64-GOOD-RA-SAHF-NEXT:    jmp .LBB0_2 -; 64-GOOD-RA-SAHF-NEXT:  .LBB0_3: # %f -; 64-GOOD-RA-SAHF-NEXT:    xorl %eax, %eax -; 64-GOOD-RA-SAHF-NEXT:  .LBB0_2: # %t -; 64-GOOD-RA-SAHF-NEXT:    addq $8, %rsp -; 64-GOOD-RA-SAHF-NEXT:    popq %rbx -; 64-GOOD-RA-SAHF-NEXT:    popq %rbp -; 64-GOOD-RA-SAHF-NEXT:    retq -; -; 64-FAST-RA-SAHF-LABEL: test_intervening_call: -; 64-FAST-RA-SAHF:       # %bb.0: # %entry -; 64-FAST-RA-SAHF-NEXT:    pushq %rbp -; 64-FAST-RA-SAHF-NEXT:    movq %rsp, %rbp -; 64-FAST-RA-SAHF-NEXT:    pushq %rbx -; 64-FAST-RA-SAHF-NEXT:    pushq %rax -; 64-FAST-RA-SAHF-NEXT:    movq %rsi, %rax -; 64-FAST-RA-SAHF-NEXT:    lock cmpxchgq %rdx, (%rdi) -; 64-FAST-RA-SAHF-NEXT:    pushq %rax -; 64-FAST-RA-SAHF-NEXT:    seto %al -; 64-FAST-RA-SAHF-NEXT:    lahf -; 64-FAST-RA-SAHF-NEXT:    movq %rax, %rbx -; 64-FAST-RA-SAHF-NEXT:    popq %rax -; 64-FAST-RA-SAHF-NEXT:    movq %rax, %rdi -; 64-FAST-RA-SAHF-NEXT:    callq bar -; 64-FAST-RA-SAHF-NEXT:    movq %rbx, %rax -; 64-FAST-RA-SAHF-NEXT:    addb $127, %al -; 64-FAST-RA-SAHF-NEXT:    sahf -; 64-FAST-RA-SAHF-NEXT:    jne .LBB0_3 -; 64-FAST-RA-SAHF-NEXT:  # %bb.1: # %t -; 64-FAST-RA-SAHF-NEXT:    movl $42, %eax -; 64-FAST-RA-SAHF-NEXT:    jmp .LBB0_2 -; 64-FAST-RA-SAHF-NEXT:  .LBB0_3: # %f -; 64-FAST-RA-SAHF-NEXT:    xorl %eax, %eax -; 64-FAST-RA-SAHF-NEXT:  .LBB0_2: # %t -; 64-FAST-RA-SAHF-NEXT:    addq $8, %rsp -; 64-FAST-RA-SAHF-NEXT:    popq %rbx -; 64-FAST-RA-SAHF-NEXT:    popq %rbp -; 64-FAST-RA-SAHF-NEXT:    retq +; 64-ALL-LABEL: test_intervening_call: +; 64-ALL:       # %bb.0: # %entry +; 64-ALL-NEXT:    pushq %rbx +; 64-ALL-NEXT:    movq %rsi, %rax +; 64-ALL-NEXT:    lock cmpxchgq %rdx, (%rdi) +; 64-ALL-NEXT:    setne %bl +; 64-ALL-NEXT:    movq %rax, %rdi +; 64-ALL-NEXT:    callq bar +; 64-ALL-NEXT:    testb $-1, %bl +; 64-ALL-NEXT:    jne .LBB0_2 +; 64-ALL-NEXT:  # %bb.1: # %t +; 64-ALL-NEXT:    movl $42, %eax +; 64-ALL-NEXT:    popq %rbx +; 64-ALL-NEXT:    retq +; 64-ALL-NEXT:  .LBB0_2: # %f +; 64-ALL-NEXT:    xorl %eax, %eax +; 64-ALL-NEXT:    popq %rbx +; 64-ALL-NEXT:    retq  entry:    %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst    %v = extractvalue { i64, i1 } %cx, 0 @@ -331,149 +210,64 @@ cond.end:  define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind {  ; 32-GOOD-RA-LABEL: test_feed_cmov:  ; 32-GOOD-RA:       # %bb.0: # %entry -; 32-GOOD-RA-NEXT:    pushl %ebp -; 32-GOOD-RA-NEXT:    movl %esp, %ebp -; 32-GOOD-RA-NEXT:    pushl %edi +; 32-GOOD-RA-NEXT:    pushl %ebx  ; 32-GOOD-RA-NEXT:    pushl %esi -; 32-GOOD-RA-NEXT:    movl 12(%ebp), %eax -; 32-GOOD-RA-NEXT:    movl 16(%ebp), %esi -; 32-GOOD-RA-NEXT:    movl 8(%ebp), %ecx +; 32-GOOD-RA-NEXT:    pushl %eax +; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %eax +; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %esi +; 32-GOOD-RA-NEXT:    movl {{[0-9]+}}(%esp), %ecx  ; 32-GOOD-RA-NEXT:    lock cmpxchgl %esi, (%ecx) -; 32-GOOD-RA-NEXT:    seto %al -; 32-GOOD-RA-NEXT:    lahf -; 32-GOOD-RA-NEXT:    movl %eax, %edi +; 32-GOOD-RA-NEXT:    sete %bl  ; 32-GOOD-RA-NEXT:    calll foo -; 32-GOOD-RA-NEXT:    pushl %eax -; 32-GOOD-RA-NEXT:    movl %edi, %eax -; 32-GOOD-RA-NEXT:    addb $127, %al -; 32-GOOD-RA-NEXT:    sahf -; 32-GOOD-RA-NEXT:    popl %eax -; 32-GOOD-RA-NEXT:    je .LBB2_2 +; 32-GOOD-RA-NEXT:    testb $-1, %bl +; 32-GOOD-RA-NEXT:    jne .LBB2_2  ; 32-GOOD-RA-NEXT:  # %bb.1: # %entry  ; 32-GOOD-RA-NEXT:    movl %eax, %esi  ; 32-GOOD-RA-NEXT:  .LBB2_2: # %entry  ; 32-GOOD-RA-NEXT:    movl %esi, %eax +; 32-GOOD-RA-NEXT:    addl $4, %esp  ; 32-GOOD-RA-NEXT:    popl %esi -; 32-GOOD-RA-NEXT:    popl %edi -; 32-GOOD-RA-NEXT:    popl %ebp +; 32-GOOD-RA-NEXT:    popl %ebx  ; 32-GOOD-RA-NEXT:    retl  ;  ; 32-FAST-RA-LABEL: test_feed_cmov:  ; 32-FAST-RA:       # %bb.0: # %entry -; 32-FAST-RA-NEXT:    pushl %ebp -; 32-FAST-RA-NEXT:    movl %esp, %ebp -; 32-FAST-RA-NEXT:    pushl %edi +; 32-FAST-RA-NEXT:    pushl %ebx  ; 32-FAST-RA-NEXT:    pushl %esi -; 32-FAST-RA-NEXT:    movl 8(%ebp), %ecx -; 32-FAST-RA-NEXT:    movl 16(%ebp), %esi -; 32-FAST-RA-NEXT:    movl 12(%ebp), %eax +; 32-FAST-RA-NEXT:    pushl %eax +; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %esi +; 32-FAST-RA-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; 32-FAST-RA-NEXT:    lock cmpxchgl %esi, (%ecx) -; 32-FAST-RA-NEXT:    seto %al -; 32-FAST-RA-NEXT:    lahf -; 32-FAST-RA-NEXT:    movl %eax, %edi +; 32-FAST-RA-NEXT:    sete %bl  ; 32-FAST-RA-NEXT:    calll foo -; 32-FAST-RA-NEXT:    pushl %eax -; 32-FAST-RA-NEXT:    movl %edi, %eax -; 32-FAST-RA-NEXT:    addb $127, %al -; 32-FAST-RA-NEXT:    sahf -; 32-FAST-RA-NEXT:    popl %eax -; 32-FAST-RA-NEXT:    je .LBB2_2 +; 32-FAST-RA-NEXT:    testb $-1, %bl +; 32-FAST-RA-NEXT:    jne .LBB2_2  ; 32-FAST-RA-NEXT:  # %bb.1: # %entry  ; 32-FAST-RA-NEXT:    movl %eax, %esi  ; 32-FAST-RA-NEXT:  .LBB2_2: # %entry  ; 32-FAST-RA-NEXT:    movl %esi, %eax +; 32-FAST-RA-NEXT:    addl $4, %esp  ; 32-FAST-RA-NEXT:    popl %esi -; 32-FAST-RA-NEXT:    popl %edi -; 32-FAST-RA-NEXT:    popl %ebp +; 32-FAST-RA-NEXT:    popl %ebx  ; 32-FAST-RA-NEXT:    retl  ; -; 64-GOOD-RA-LABEL: test_feed_cmov: -; 64-GOOD-RA:       # %bb.0: # %entry -; 64-GOOD-RA-NEXT:    pushq %rbp -; 64-GOOD-RA-NEXT:    movq %rsp, %rbp -; 64-GOOD-RA-NEXT:    pushq %r14 -; 64-GOOD-RA-NEXT:    pushq %rbx -; 64-GOOD-RA-NEXT:    movl %edx, %ebx -; 64-GOOD-RA-NEXT:    movl %esi, %eax -; 64-GOOD-RA-NEXT:    lock cmpxchgl %edx, (%rdi) -; 64-GOOD-RA-NEXT:    pushfq -; 64-GOOD-RA-NEXT:    popq %r14 -; 64-GOOD-RA-NEXT:    callq foo -; 64-GOOD-RA-NEXT:    pushq %r14 -; 64-GOOD-RA-NEXT:    popfq -; 64-GOOD-RA-NEXT:    cmovel %ebx, %eax -; 64-GOOD-RA-NEXT:    popq %rbx -; 64-GOOD-RA-NEXT:    popq %r14 -; 64-GOOD-RA-NEXT:    popq %rbp -; 64-GOOD-RA-NEXT:    retq -; -; 64-FAST-RA-LABEL: test_feed_cmov: -; 64-FAST-RA:       # %bb.0: # %entry -; 64-FAST-RA-NEXT:    pushq %rbp -; 64-FAST-RA-NEXT:    movq %rsp, %rbp -; 64-FAST-RA-NEXT:    pushq %r14 -; 64-FAST-RA-NEXT:    pushq %rbx -; 64-FAST-RA-NEXT:    movl %edx, %ebx -; 64-FAST-RA-NEXT:    movl %esi, %eax -; 64-FAST-RA-NEXT:    lock cmpxchgl %edx, (%rdi) -; 64-FAST-RA-NEXT:    pushfq -; 64-FAST-RA-NEXT:    popq %r14 -; 64-FAST-RA-NEXT:    callq foo -; 64-FAST-RA-NEXT:    pushq %r14 -; 64-FAST-RA-NEXT:    popfq -; 64-FAST-RA-NEXT:    cmovel %ebx, %eax -; 64-FAST-RA-NEXT:    popq %rbx -; 64-FAST-RA-NEXT:    popq %r14 -; 64-FAST-RA-NEXT:    popq %rbp -; 64-FAST-RA-NEXT:    retq -; -; 64-GOOD-RA-SAHF-LABEL: test_feed_cmov: -; 64-GOOD-RA-SAHF:       # %bb.0: # %entry -; 64-GOOD-RA-SAHF-NEXT:    pushq %rbp -; 64-GOOD-RA-SAHF-NEXT:    movq %rsp, %rbp -; 64-GOOD-RA-SAHF-NEXT:    pushq %r14 -; 64-GOOD-RA-SAHF-NEXT:    pushq %rbx -; 64-GOOD-RA-SAHF-NEXT:    movl %edx, %ebx -; 64-GOOD-RA-SAHF-NEXT:    movl %esi, %eax -; 64-GOOD-RA-SAHF-NEXT:    lock cmpxchgl %edx, (%rdi) -; 64-GOOD-RA-SAHF-NEXT:    seto %al -; 64-GOOD-RA-SAHF-NEXT:    lahf -; 64-GOOD-RA-SAHF-NEXT:    movq %rax, %r14 -; 64-GOOD-RA-SAHF-NEXT:    callq foo -; 64-GOOD-RA-SAHF-NEXT:    pushq %rax -; 64-GOOD-RA-SAHF-NEXT:    movq %r14, %rax -; 64-GOOD-RA-SAHF-NEXT:    addb $127, %al -; 64-GOOD-RA-SAHF-NEXT:    sahf -; 64-GOOD-RA-SAHF-NEXT:    popq %rax -; 64-GOOD-RA-SAHF-NEXT:    cmovel %ebx, %eax -; 64-GOOD-RA-SAHF-NEXT:    popq %rbx -; 64-GOOD-RA-SAHF-NEXT:    popq %r14 -; 64-GOOD-RA-SAHF-NEXT:    popq %rbp -; 64-GOOD-RA-SAHF-NEXT:    retq -; -; 64-FAST-RA-SAHF-LABEL: test_feed_cmov: -; 64-FAST-RA-SAHF:       # %bb.0: # %entry -; 64-FAST-RA-SAHF-NEXT:    pushq %rbp -; 64-FAST-RA-SAHF-NEXT:    movq %rsp, %rbp -; 64-FAST-RA-SAHF-NEXT:    pushq %r14 -; 64-FAST-RA-SAHF-NEXT:    pushq %rbx -; 64-FAST-RA-SAHF-NEXT:    movl %edx, %ebx -; 64-FAST-RA-SAHF-NEXT:    movl %esi, %eax -; 64-FAST-RA-SAHF-NEXT:    lock cmpxchgl %edx, (%rdi) -; 64-FAST-RA-SAHF-NEXT:    seto %al -; 64-FAST-RA-SAHF-NEXT:    lahf -; 64-FAST-RA-SAHF-NEXT:    movq %rax, %r14 -; 64-FAST-RA-SAHF-NEXT:    callq foo -; 64-FAST-RA-SAHF-NEXT:    pushq %rax -; 64-FAST-RA-SAHF-NEXT:    movq %r14, %rax -; 64-FAST-RA-SAHF-NEXT:    addb $127, %al -; 64-FAST-RA-SAHF-NEXT:    sahf -; 64-FAST-RA-SAHF-NEXT:    popq %rax -; 64-FAST-RA-SAHF-NEXT:    cmovel %ebx, %eax -; 64-FAST-RA-SAHF-NEXT:    popq %rbx -; 64-FAST-RA-SAHF-NEXT:    popq %r14 -; 64-FAST-RA-SAHF-NEXT:    popq %rbp -; 64-FAST-RA-SAHF-NEXT:    retq +; 64-ALL-LABEL: test_feed_cmov: +; 64-ALL:       # %bb.0: # %entry +; 64-ALL-NEXT:    pushq %rbp +; 64-ALL-NEXT:    pushq %rbx +; 64-ALL-NEXT:    pushq %rax +; 64-ALL-NEXT:    movl %edx, %ebx +; 64-ALL-NEXT:    movl %esi, %eax +; 64-ALL-NEXT:    lock cmpxchgl %edx, (%rdi) +; 64-ALL-NEXT:    sete %bpl +; 64-ALL-NEXT:    callq foo +; 64-ALL-NEXT:    testb $-1, %bpl +; 64-ALL-NEXT:    cmovnel %ebx, %eax +; 64-ALL-NEXT:    addq $8, %rsp +; 64-ALL-NEXT:    popq %rbx +; 64-ALL-NEXT:    popq %rbp +; 64-ALL-NEXT:    retq  entry:    %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst    %success = extractvalue { i32, i1 } %res, 1 diff --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll index 1e4fca5d10a..07d13fe2e09 100644 --- a/llvm/test/CodeGen/X86/copy-eflags.ll +++ b/llvm/test/CodeGen/X86/copy-eflags.ll @@ -19,35 +19,24 @@ define i32 @test1() nounwind {  ; X32-LABEL: test1:  ; X32:       # %bb.0: # %entry  ; X32-NEXT:    movb b, %cl -; X32-NEXT:    movb %cl, %al +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    incb %al  ; X32-NEXT:    movb %al, b  ; X32-NEXT:    incl c -; X32-NEXT:    pushl %eax -; X32-NEXT:    seto %al -; X32-NEXT:    lahf -; X32-NEXT:    movl %eax, %edx -; X32-NEXT:    popl %eax +; X32-NEXT:    sete %dl  ; X32-NEXT:    movb a, %ah  ; X32-NEXT:    movb %ah, %ch  ; X32-NEXT:    incb %ch  ; X32-NEXT:    cmpb %cl, %ah  ; X32-NEXT:    sete d  ; X32-NEXT:    movb %ch, a -; X32-NEXT:    pushl %eax -; X32-NEXT:    movl %edx, %eax -; X32-NEXT:    addb $127, %al -; X32-NEXT:    sahf -; X32-NEXT:    popl %eax -; X32-NEXT:    je .LBB0_2 +; X32-NEXT:    testb $-1, %dl +; X32-NEXT:    jne .LBB0_2  ; X32-NEXT:  # %bb.1: # %if.then -; X32-NEXT:    pushl %ebp -; X32-NEXT:    movl %esp, %ebp  ; X32-NEXT:    movsbl %al, %eax  ; X32-NEXT:    pushl %eax  ; X32-NEXT:    calll external  ; X32-NEXT:    addl $4, %esp -; X32-NEXT:    popl %ebp  ; X32-NEXT:  .LBB0_2: # %if.end  ; X32-NEXT:    xorl %eax, %eax  ; X32-NEXT:    retl @@ -59,23 +48,20 @@ define i32 @test1() nounwind {  ; X64-NEXT:    incb %al  ; X64-NEXT:    movb %al, {{.*}}(%rip)  ; X64-NEXT:    incl {{.*}}(%rip) -; X64-NEXT:    pushfq -; X64-NEXT:    popq %rsi +; X64-NEXT:    sete %sil  ; X64-NEXT:    movb {{.*}}(%rip), %cl  ; X64-NEXT:    movl %ecx, %edx  ; X64-NEXT:    incb %dl  ; X64-NEXT:    cmpb %dil, %cl  ; X64-NEXT:    sete {{.*}}(%rip)  ; X64-NEXT:    movb %dl, {{.*}}(%rip) -; X64-NEXT:    pushq %rsi -; X64-NEXT:    popfq -; X64-NEXT:    je .LBB0_2 +; X64-NEXT:    testb $-1, %sil +; X64-NEXT:    jne .LBB0_2  ; X64-NEXT:  # %bb.1: # %if.then -; X64-NEXT:    pushq %rbp -; X64-NEXT:    movq %rsp, %rbp +; X64-NEXT:    pushq %rax  ; X64-NEXT:    movsbl %al, %edi  ; X64-NEXT:    callq external -; X64-NEXT:    popq %rbp +; X64-NEXT:    addq $8, %rsp  ; X64-NEXT:  .LBB0_2: # %if.end  ; X64-NEXT:    xorl %eax, %eax  ; X64-NEXT:    retq @@ -108,54 +94,40 @@ if.end:  define i32 @test2(i32* %ptr) nounwind {  ; X32-LABEL: test2:  ; X32:       # %bb.0: # %entry -; X32-NEXT:    pushl %ebp -; X32-NEXT:    movl %esp, %ebp -; X32-NEXT:    pushl %esi -; X32-NEXT:    movl 8(%ebp), %eax +; X32-NEXT:    pushl %ebx +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    incl (%eax) -; X32-NEXT:    seto %al -; X32-NEXT:    lahf -; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    setne %bl  ; X32-NEXT:    pushl $42  ; X32-NEXT:    calll external  ; X32-NEXT:    addl $4, %esp -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    addb $127, %al -; X32-NEXT:    sahf +; X32-NEXT:    testb $-1, %bl  ; X32-NEXT:    je .LBB1_1 -; X32-NEXT:  # %bb.3: # %else +; X32-NEXT:  # %bb.2: # %else  ; X32-NEXT:    xorl %eax, %eax -; X32-NEXT:    jmp .LBB1_2 +; X32-NEXT:    popl %ebx +; X32-NEXT:    retl  ; X32-NEXT:  .LBB1_1: # %then  ; X32-NEXT:    movl $64, %eax -; X32-NEXT:  .LBB1_2: # %then -; X32-NEXT:    popl %esi -; X32-NEXT:    popl %ebp +; X32-NEXT:    popl %ebx  ; X32-NEXT:    retl  ;  ; X64-LABEL: test2:  ; X64:       # %bb.0: # %entry -; X64-NEXT:    pushq %rbp -; X64-NEXT:    movq %rsp, %rbp  ; X64-NEXT:    pushq %rbx -; X64-NEXT:    pushq %rax  ; X64-NEXT:    incl (%rdi) -; X64-NEXT:    pushfq -; X64-NEXT:    popq %rbx +; X64-NEXT:    setne %bl  ; X64-NEXT:    movl $42, %edi  ; X64-NEXT:    callq external -; X64-NEXT:    pushq %rbx -; X64-NEXT:    popfq +; X64-NEXT:    testb $-1, %bl  ; X64-NEXT:    je .LBB1_1 -; X64-NEXT:  # %bb.3: # %else +; X64-NEXT:  # %bb.2: # %else  ; X64-NEXT:    xorl %eax, %eax -; X64-NEXT:    jmp .LBB1_2 +; X64-NEXT:    popq %rbx +; X64-NEXT:    retq  ; X64-NEXT:  .LBB1_1: # %then  ; X64-NEXT:    movl $64, %eax -; X64-NEXT:  .LBB1_2: # %then -; X64-NEXT:    addq $8, %rsp  ; X64-NEXT:    popq %rbx -; X64-NEXT:    popq %rbp  ; X64-NEXT:    retq  entry:    %val = load i32, i32* %ptr @@ -183,43 +155,25 @@ declare void @external_b()  define void @test_tail_call(i32* %ptr) nounwind optsize {  ; X32-LABEL: test_tail_call:  ; X32:       # %bb.0: # %entry -; X32-NEXT:    pushl %ebp -; X32-NEXT:    movl %esp, %ebp -; X32-NEXT:    movl 8(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    incl (%eax) -; X32-NEXT:    seto %al -; X32-NEXT:    lahf -; X32-NEXT:    movl %eax, %eax +; X32-NEXT:    setne %al  ; X32-NEXT:    incb a  ; X32-NEXT:    sete d -; X32-NEXT:    movl %eax, %eax -; X32-NEXT:    addb $127, %al -; X32-NEXT:    sahf -; X32-NEXT:    je .LBB2_1 -; X32-NEXT:  # %bb.2: # %else -; X32-NEXT:    popl %ebp -; X32-NEXT:    jmp external_b # TAILCALL -; X32-NEXT:  .LBB2_1: # %then -; X32-NEXT:    popl %ebp +; X32-NEXT:    testb $-1, %al +; X32-NEXT:    jne external_b # TAILCALL +; X32-NEXT:  # %bb.1: # %then  ; X32-NEXT:    jmp external_a # TAILCALL  ;  ; X64-LABEL: test_tail_call:  ; X64:       # %bb.0: # %entry -; X64-NEXT:    pushq %rbp -; X64-NEXT:    movq %rsp, %rbp  ; X64-NEXT:    incl (%rdi) -; X64-NEXT:    pushfq -; X64-NEXT:    popq %rax +; X64-NEXT:    setne %al  ; X64-NEXT:    incb {{.*}}(%rip)  ; X64-NEXT:    sete {{.*}}(%rip) -; X64-NEXT:    pushq %rax -; X64-NEXT:    popfq -; X64-NEXT:    je .LBB2_1 -; X64-NEXT:  # %bb.2: # %else -; X64-NEXT:    popq %rbp -; X64-NEXT:    jmp external_b # TAILCALL -; X64-NEXT:  .LBB2_1: # %then -; X64-NEXT:    popq %rbp +; X64-NEXT:    testb $-1, %al +; X64-NEXT:    jne external_b # TAILCALL +; X64-NEXT:  # %bb.1: # %then  ; X64-NEXT:    jmp external_a # TAILCALL  entry:    %val = load i32, i32* %ptr diff --git a/llvm/test/CodeGen/X86/eflags-copy-expansion.mir b/llvm/test/CodeGen/X86/eflags-copy-expansion.mir deleted file mode 100644 index 385e3d9a67d..00000000000 --- a/llvm/test/CodeGen/X86/eflags-copy-expansion.mir +++ /dev/null @@ -1,64 +0,0 @@ -# RUN: llc -run-pass postrapseudos -mtriple=i386-apple-macosx -o - %s | FileCheck %s - -# Verify that we correctly save and restore eax when copying eflags, -# even when only a smaller alias of eax is used. We used to check only -# eax and not its aliases. -# PR27624. - ---- | -  target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" - -  define void @foo() { -  entry: -    br label %false -  false: -    ret void -  } - -... - ---- -name:            foo -tracksRegLiveness: true -liveins: -  - { reg: '$edi' } -body:             | -  bb.0.entry: -    liveins: $edi -    NOOP implicit-def $al - -    ; The bug was triggered only when LivePhysReg is used, which -    ; happens only when the heuristic for the liveness computation -    ; failed. The liveness computation heuristic looks at 10 instructions -    ; before and after the copy. Make sure we do not reach the definition of -    ; AL in 10 instructions, otherwise the heuristic will see that it is live. -    NOOP -    NOOP -    NOOP -    NOOP -    NOOP -    NOOP -    NOOP -    NOOP -    NOOP -    NOOP -    NOOP -    NOOP -    NOOP -    ; Save AL. -    ; CHECK: PUSH32r killed $eax - -    ; Copy edi into EFLAGS -    ; CHECK-NEXT: $eax = MOV32rr $edi -    ; CHECK-NEXT: $al = ADD8ri $al, 127, implicit-def $eflags -    ; CHECK-NEXT: SAHF implicit-def $eflags, implicit $ah -    $eflags = COPY $edi - -    ; Restore AL. -    ; CHECK-NEXT: $eax = POP32r -  bb.1.false: -    liveins: $al -    NOOP implicit $al -    RETQ - -... diff --git a/llvm/test/CodeGen/X86/flags-copy-lowering.mir b/llvm/test/CodeGen/X86/flags-copy-lowering.mir new file mode 100644 index 00000000000..fd263c0d1d4 --- /dev/null +++ b/llvm/test/CodeGen/X86/flags-copy-lowering.mir @@ -0,0 +1,485 @@ +# RUN: llc -run-pass x86-flags-copy-lowering -verify-machineinstrs -o - %s | FileCheck %s +# +# Lower various interesting copy patterns of EFLAGS without using LAHF/SAHF. + +--- | +  target triple = "x86_64-unknown-unknown" +   +  declare void @foo() +   +  define i32 @test_branch(i64 %a, i64 %b) { +  entry: +    call void @foo() +    ret i32 0 +  } + +  define i32 @test_branch_fallthrough(i64 %a, i64 %b) { +  entry: +    call void @foo() +    ret i32 0 +  } + +  define void @test_setcc(i64 %a, i64 %b) { +  entry: +    call void @foo() +    ret void +  } + +  define void @test_cmov(i64 %a, i64 %b) { +  entry: +    call void @foo() +    ret void +  } + +  define void @test_adc(i64 %a, i64 %b) { +  entry: +    call void @foo() +    ret void +  } + +  define void @test_sbb(i64 %a, i64 %b) { +  entry: +    call void @foo() +    ret void +  } + +  define void @test_adcx(i64 %a, i64 %b) { +  entry: +    call void @foo() +    ret void +  } + +  define void @test_adox(i64 %a, i64 %b) { +  entry: +    call void @foo() +    ret void +  } + +  define void @test_rcl(i64 %a, i64 %b) { +  entry: +    call void @foo() +    ret void +  } + +  define void @test_rcr(i64 %a, i64 %b) { +  entry: +    call void @foo() +    ret void +  } +... +--- +name:            test_branch +# CHECK-LABEL: name: test_branch +liveins:          +  - { reg: '$rdi', virtual-reg: '%0' } +  - { reg: '$rsi', virtual-reg: '%1' } +body:             | +  bb.0: +    successors: %bb.1, %bb.2, %bb.3 +    liveins: $rdi, $rsi +   +    %0:gr64 = COPY $rdi +    %1:gr64 = COPY $rsi +    CMP64rr %0, %1, implicit-def $eflags +    %2:gr64 = COPY $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags +  ; CHECK:      %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags +  ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags + +    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp +    CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax +    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + +    $eflags = COPY %2 +    JA_1 %bb.1, implicit $eflags +    JB_1 %bb.2, implicit $eflags +    JMP_1 %bb.3 +  ; CHECK-NOT: $eflags = +  ; +  ; CHECK:        TEST8ri %[[A_REG]], -1, implicit-def $eflags +  ; CHECK-NEXT:   JNE_1 %bb.1, implicit killed $eflags +  ; CHECK-SAME: {{$[[:space:]]}} +  ; CHECK-NEXT: bb.4: +  ; CHECK-NEXT:   successors: {{.*$}} +  ; CHECK-SAME: {{$[[:space:]]}} +  ; CHECK-NEXT:   TEST8ri %[[B_REG]], -1, implicit-def $eflags +  ; CHECK-NEXT:   JNE_1 %bb.2, implicit killed $eflags +  ; CHECK-NEXT:   JMP_1 %bb.3 +   +  bb.1: +    %3:gr32 = MOV32ri64 42 +    $eax = COPY %3 +    RET 0, $eax +   +  bb.2: +    %4:gr32 = MOV32ri64 43 +    $eax = COPY %4 +    RET 0, $eax +   +  bb.3: +    %5:gr32 = MOV32r0 implicit-def dead $eflags +    $eax = COPY %5 +    RET 0, $eax + +... +--- +name:            test_branch_fallthrough +# CHECK-LABEL: name: test_branch_fallthrough +liveins:          +  - { reg: '$rdi', virtual-reg: '%0' } +  - { reg: '$rsi', virtual-reg: '%1' } +body:             | +  bb.0: +    successors: %bb.1, %bb.2, %bb.3 +    liveins: $rdi, $rsi +   +    %0:gr64 = COPY $rdi +    %1:gr64 = COPY $rsi +    CMP64rr %0, %1, implicit-def $eflags +    %2:gr64 = COPY $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags +  ; CHECK:      %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags +  ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags + +    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp +    CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax +    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + +    $eflags = COPY %2 +    JA_1 %bb.2, implicit $eflags +    JB_1 %bb.3, implicit $eflags +  ; CHECK-NOT: $eflags = +  ; +  ; CHECK:        TEST8ri %[[A_REG]], -1, implicit-def $eflags +  ; CHECK-NEXT:   JNE_1 %bb.2, implicit killed $eflags +  ; CHECK-SAME: {{$[[:space:]]}} +  ; CHECK-NEXT: bb.4: +  ; CHECK-NEXT:   successors: {{.*$}} +  ; CHECK-SAME: {{$[[:space:]]}} +  ; CHECK-NEXT:   TEST8ri %[[B_REG]], -1, implicit-def $eflags +  ; CHECK-NEXT:   JNE_1 %bb.3, implicit killed $eflags +  ; CHECK-SAME: {{$[[:space:]]}} +  ; CHECK-NEXT:   bb.1: + +  bb.1: +    %5:gr32 = MOV32r0 implicit-def dead $eflags +    $eax = COPY %5 +    RET 0, $eax +   +  bb.2: +    %3:gr32 = MOV32ri64 42 +    $eax = COPY %3 +    RET 0, $eax +   +  bb.3: +    %4:gr32 = MOV32ri64 43 +    $eax = COPY %4 +    RET 0, $eax +   +... +--- +name:            test_setcc +# CHECK-LABEL: name: test_setcc +liveins:          +  - { reg: '$rdi', virtual-reg: '%0' } +  - { reg: '$rsi', virtual-reg: '%1' } +body:             | +  bb.0: +    liveins: $rdi, $rsi +   +    %0:gr64 = COPY $rdi +    %1:gr64 = COPY $rsi +    CMP64rr %0, %1, implicit-def $eflags +    %2:gr64 = COPY $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags +  ; CHECK:      %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags +  ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags +  ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags +  ; CHECK-NEXT: %[[NE_REG:[^:]*]]:gr8 = SETNEr implicit $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags + +    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp +    CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax +    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + +    $eflags = COPY %2 +    %3:gr8 = SETAr implicit $eflags +    %4:gr8 = SETBr implicit $eflags +    %5:gr8 = SETEr implicit $eflags +    %6:gr8 = SETNEr implicit killed $eflags +    MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %3 +    MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %4 +    MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %5 +    MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %6 +  ; CHECK-NOT:     $eflags = +  ; CHECK-NOT:             = SET{{.*}} +  ; CHECK:         MOV8mr {{.*}}, killed %[[A_REG]] +  ; CHECK-CHECK:   MOV8mr {{.*}}, killed %[[B_REG]] +  ; CHECK-CHECK:   MOV8mr {{.*}}, killed %[[E_REG]] +  ; CHECK-CHECK:   MOV8mr {{.*}}, killed %[[NE_REG]] + +    RET 0 + +... +--- +name:            test_cmov +# CHECK-LABEL: name: test_cmov +liveins:          +  - { reg: '$rdi', virtual-reg: '%0' } +  - { reg: '$rsi', virtual-reg: '%1' } +body:             | +  bb.0: +    liveins: $rdi, $rsi +   +    %0:gr64 = COPY $rdi +    %1:gr64 = COPY $rsi +    CMP64rr %0, %1, implicit-def $eflags +    %2:gr64 = COPY $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags +  ; CHECK:      %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags +  ; CHECK-NEXT: %[[B_REG:[^:]*]]:gr8 = SETBr implicit $eflags +  ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags + +    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp +    CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax +    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + +    $eflags = COPY %2 +    %3:gr64 = CMOVA64rr %0, %1, implicit $eflags +    %4:gr64 = CMOVB64rr %0, %1, implicit $eflags +    %5:gr64 = CMOVE64rr %0, %1, implicit $eflags +    %6:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags +  ; CHECK-NOT:     $eflags = +  ; CHECK:         TEST8ri %[[A_REG]], -1, implicit-def $eflags +  ; CHECK-NEXT:    %3:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags +  ; CHECK-NEXT:    TEST8ri %[[B_REG]], -1, implicit-def $eflags +  ; CHECK-NEXT:    %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags +  ; CHECK-NEXT:    TEST8ri %[[E_REG]], -1, implicit-def $eflags +  ; CHECK-NEXT:    %5:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags +  ; CHECK-NEXT:    TEST8ri %[[E_REG]], -1, implicit-def $eflags +  ; CHECK-NEXT:    %6:gr64 = CMOVE64rr %0, %1, implicit killed $eflags +    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %3 +    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4 +    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5 +    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %6 + +    RET 0 + +... +--- +name:            test_adc +# CHECK-LABEL: name: test_adc +liveins:          +  - { reg: '$rdi', virtual-reg: '%0' } +  - { reg: '$rsi', virtual-reg: '%1' } +body:             | +  bb.0: +    liveins: $rdi, $rsi +   +    %0:gr64 = COPY $rdi +    %1:gr64 = COPY $rsi +    %2:gr64 = ADD64rr %0, %1, implicit-def $eflags +    %3:gr64 = COPY $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags +  ; CHECK:      %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags + +    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp +    CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax +    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + +    $eflags = COPY %3 +    %4:gr64 = ADC64ri32 %2:gr64, 42, implicit-def $eflags, implicit $eflags +    %5:gr64 = ADC64ri32 %4:gr64, 42, implicit-def $eflags, implicit $eflags +  ; CHECK-NOT:     $eflags = +  ; CHECK:         dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags +  ; CHECK-NEXT:    %4:gr64 = ADC64ri32 %2, 42, implicit-def $eflags, implicit killed $eflags +  ; CHECK-NEXT:    %5:gr64 = ADC64ri32 %4, 42, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags +    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5 + +    RET 0 + +... +--- +name:            test_sbb +# CHECK-LABEL: name: test_sbb +liveins:          +  - { reg: '$rdi', virtual-reg: '%0' } +  - { reg: '$rsi', virtual-reg: '%1' } +body:             | +  bb.0: +    liveins: $rdi, $rsi +   +    %0:gr64 = COPY $rdi +    %1:gr64 = COPY $rsi +    %2:gr64 = SUB64rr %0, %1, implicit-def $eflags +    %3:gr64 = COPY killed $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags +  ; CHECK:      %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags + +    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp +    CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax +    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + +    $eflags = COPY %3 +    %4:gr64 = SBB64ri32 %2:gr64, 42, implicit-def $eflags, implicit killed $eflags +    %5:gr64 = SBB64ri32 %4:gr64, 42, implicit-def dead $eflags, implicit killed $eflags +  ; CHECK-NOT:     $eflags = +  ; CHECK:         dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags +  ; CHECK-NEXT:    %4:gr64 = SBB64ri32 %2, 42, implicit-def $eflags, implicit killed $eflags +  ; CHECK-NEXT:    %5:gr64 = SBB64ri32 %4, 42, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags +    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5 + +    RET 0 + +... +--- +name:            test_adcx +# CHECK-LABEL: name: test_adcx +liveins:          +  - { reg: '$rdi', virtual-reg: '%0' } +  - { reg: '$rsi', virtual-reg: '%1' } +body:             | +  bb.0: +    liveins: $rdi, $rsi +   +    %0:gr64 = COPY $rdi +    %1:gr64 = COPY $rsi +    %2:gr64 = ADD64rr %0, %1, implicit-def $eflags +    %3:gr64 = COPY $eflags +  ; CHECK-NOT:    COPY{{( killed)?}} $eflags +  ; CHECK:        %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags +  ; CHECK-NEXT:   %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags +  ; CHECK-NOT:    COPY{{( killed)?}} $eflags + +    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp +    CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax +    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + +    $eflags = COPY %3 +    %4:gr64 = CMOVE64rr %0, %1, implicit $eflags +    %5:gr64 = MOV64ri32 42 +    %6:gr64 = ADCX64rr %2, %5, implicit-def $eflags, implicit $eflags +  ; CHECK-NOT:     $eflags = +  ; CHECK:         TEST8ri %[[E_REG]], -1, implicit-def $eflags +  ; CHECK-NEXT:    %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags +  ; CHECK-NEXT:    %5:gr64 = MOV64ri32 42 +  ; CHECK-NEXT:    dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags +  ; CHECK-NEXT:    %6:gr64 = ADCX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags +    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4 +    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %6 + +    RET 0 + +... +--- +name:            test_adox +# CHECK-LABEL: name: test_adox +liveins:          +  - { reg: '$rdi', virtual-reg: '%0' } +  - { reg: '$rsi', virtual-reg: '%1' } +body:             | +  bb.0: +    liveins: $rdi, $rsi +   +    %0:gr64 = COPY $rdi +    %1:gr64 = COPY $rsi +    %2:gr64 = ADD64rr %0, %1, implicit-def $eflags +    %3:gr64 = COPY $eflags +  ; CHECK-NOT:    COPY{{( killed)?}} $eflags +  ; CHECK:        %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags +  ; CHECK-NEXT:   %[[OF_REG:[^:]*]]:gr8 = SETOr implicit $eflags +  ; CHECK-NOT:    COPY{{( killed)?}} $eflags + +    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp +    CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax +    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + +    $eflags = COPY %3 +    %4:gr64 = CMOVE64rr %0, %1, implicit $eflags +    %5:gr64 = MOV64ri32 42 +    %6:gr64 = ADOX64rr %2, %5, implicit-def $eflags, implicit $eflags +  ; CHECK-NOT:     $eflags = +  ; CHECK:         TEST8ri %[[E_REG]], -1, implicit-def $eflags +  ; CHECK-NEXT:    %4:gr64 = CMOVNE64rr %0, %1, implicit killed $eflags +  ; CHECK-NEXT:    %5:gr64 = MOV64ri32 42 +  ; CHECK-NEXT:    dead %{{[^:]*}}:gr8 = ADD8ri %[[OF_REG]], 127, implicit-def $eflags +  ; CHECK-NEXT:    %6:gr64 = ADOX64rr %2, %5, implicit-def{{( dead)?}} $eflags, implicit killed $eflags +    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %4 +    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %6 + +    RET 0 + +... +--- +name:            test_rcl +# CHECK-LABEL: name: test_rcl +liveins:          +  - { reg: '$rdi', virtual-reg: '%0' } +  - { reg: '$rsi', virtual-reg: '%1' } +body:             | +  bb.0: +    liveins: $rdi, $rsi +   +    %0:gr64 = COPY $rdi +    %1:gr64 = COPY $rsi +    %2:gr64 = ADD64rr %0, %1, implicit-def $eflags +    %3:gr64 = COPY $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags +  ; CHECK:      %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags + +    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp +    CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax +    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + +    $eflags = COPY %3 +    %4:gr64 = RCL64r1 %2:gr64, implicit-def $eflags, implicit $eflags +    %5:gr64 = RCL64r1 %4:gr64, implicit-def $eflags, implicit $eflags +  ; CHECK-NOT:     $eflags = +  ; CHECK:         dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags +  ; CHECK-NEXT:    %4:gr64 = RCL64r1 %2, implicit-def $eflags, implicit killed $eflags +  ; CHECK-NEXT:    %5:gr64 = RCL64r1 %4, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags +    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5 + +    RET 0 + +... +--- +name:            test_rcr +# CHECK-LABEL: name: test_rcr +liveins:          +  - { reg: '$rdi', virtual-reg: '%0' } +  - { reg: '$rsi', virtual-reg: '%1' } +body:             | +  bb.0: +    liveins: $rdi, $rsi +   +    %0:gr64 = COPY $rdi +    %1:gr64 = COPY $rsi +    %2:gr64 = ADD64rr %0, %1, implicit-def $eflags +    %3:gr64 = COPY $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags +  ; CHECK:      %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags +  ; CHECK-NOT:  COPY{{( killed)?}} $eflags + +    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp +    CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax +    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + +    $eflags = COPY %3 +    %4:gr64 = RCR64r1 %2:gr64, implicit-def $eflags, implicit $eflags +    %5:gr64 = RCR64r1 %4:gr64, implicit-def $eflags, implicit $eflags +  ; CHECK-NOT:     $eflags = +  ; CHECK:         dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags +  ; CHECK-NEXT:    %4:gr64 = RCR64r1 %2, implicit-def $eflags, implicit killed $eflags +  ; CHECK-NEXT:    %5:gr64 = RCR64r1 %4, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags +    MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5 + +    RET 0 + +... diff --git a/llvm/test/CodeGen/X86/mul-i1024.ll b/llvm/test/CodeGen/X86/mul-i1024.ll index 1f15818f8d4..8dd8a8476de 100644 --- a/llvm/test/CodeGen/X86/mul-i1024.ll +++ b/llvm/test/CodeGen/X86/mul-i1024.ll @@ -6,202 +6,195 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-LABEL: test_1024:  ; X32:       # %bb.0:  ; X32-NEXT:    pushl %ebp -; X32-NEXT:    movl %esp, %ebp  ; X32-NEXT:    pushl %ebx  ; X32-NEXT:    pushl %edi  ; X32-NEXT:    pushl %esi -; X32-NEXT:    subl $996, %esp # imm = 0x3E4 -; X32-NEXT:    movl 12(%ebp), %eax -; X32-NEXT:    movl 32(%eax), %eax +; X32-NEXT:    subl $1000, %esp # imm = 0x3E8 +; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    movl 48(%eax), %ecx +; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl 32(%edx), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    xorl %ecx, %ecx -; X32-NEXT:    mull %ecx +; X32-NEXT:    xorl %edi, %edi +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl 8(%ebp), %esi -; X32-NEXT:    movl 48(%esi), %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    mull %ecx -; X32-NEXT:    xorl %ecx, %ecx +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    adcl %edi, %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %ebx, %ecx +; X32-NEXT:    movl %edx, %eax +; X32-NEXT:    adcl %ebp, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl 32(%esi), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    mull %ecx +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, %ecx  ; X32-NEXT:    addl %ebx, %ecx  ; X32-NEXT:    movl %edx, %eax -; X32-NEXT:    adcl %edi, %eax -; X32-NEXT:    movl %edi, %ecx -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ebp, %eax +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 36(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    xorl %edx, %edx -; X32-NEXT:    mull %edx +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, %edi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl %ecx, %edi -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, %eax  ; X32-NEXT:    adcl $0, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl 36(%esi), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    xorl %ecx, %ecx -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl %esi, %edx -; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    addl %edi, %ebp +; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    leal (%ebx,%edi), %eax -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    leal (%ecx,%edx), %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    leal (%ebx,%eax), %eax +; X32-NEXT:    leal (%ecx,%ebp), %edx  ; X32-NEXT:    adcl %eax, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    seto %al -; X32-NEXT:    lahf -; X32-NEXT:    movl %eax, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl %ecx, %edi -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl %esi, %ebx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl (%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %eax, %esi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl 8(%ebp), %ecx -; X32-NEXT:    movl 16(%ecx), %eax +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT:    movl 16(%ebp), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    xorl %edx, %edx -; X32-NEXT:    mull %edx +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl %esi, %eax -; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %esi, %ecx +; X32-NEXT:    adcl %ebx, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl (%ecx), %eax +; X32-NEXT:    movl (%ebp), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %esi, %ebp  ; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %edx, %eax -; X32-NEXT:    adcl %edi, %eax +; X32-NEXT:    adcl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %edi, %eax +; X32-NEXT:    adcl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    addl %esi, %eax -; X32-NEXT:    movl %esi, %ecx -; X32-NEXT:    adcl %edi, %ebx -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %ebx -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    adcl %ebx, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl 12(%ebp), %eax -; X32-NEXT:    movl 4(%eax), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi +; X32-NEXT:    movl 4(%esi), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    xorl %edx, %edx -; X32-NEXT:    mull %edx -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %ebx, %edi -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    xorl %ecx, %ecx +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %ebx, %ecx +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    addl %ebp, %ecx +; X32-NEXT:    movl %ebp, %esi  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl %ecx, %edi -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %esi -; X32-NEXT:    setb %bh -; X32-NEXT:    addl %eax, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movzbl %bh, %eax +; X32-NEXT:    adcl %ebx, %edi +; X32-NEXT:    movl %ebx, %ebp +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    setb %cl +; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    movl %edi, (%esp) # 4-byte Spill +; X32-NEXT:    movzbl %cl, %eax  ; X32-NEXT:    adcl %edx, %eax -; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 8(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    xorl %ebx, %ebx -; X32-NEXT:    mull %ebx +; X32-NEXT:    xorl %ecx, %ecx +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl %eax, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %edx, %eax -; X32-NEXT:    addl %esi, %ecx -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %eax -; X32-NEXT:    movl 52(%eax), %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    mull %ebx -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    addl %ecx, %edi -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    addl %ebx, %edi -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ecx, %esi -; X32-NEXT:    movl %ecx, %edi -; X32-NEXT:    setb %cl  ; X32-NEXT:    addl %eax, %esi -; X32-NEXT:    movzbl %cl, %eax -; X32-NEXT:    adcl %edx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %eax -; X32-NEXT:    movl 56(%eax), %eax +; X32-NEXT:    adcl %edx, %ebp +; X32-NEXT:    addl %edi, %esi +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ebx, %ebp +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT:    movl 52(%ebp), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    addl %edi, %ebx +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    adcl $0, %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    addl %esi, %ebx +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    setb %bl +; X32-NEXT:    addl %eax, %ecx +; X32-NEXT:    movzbl %bl, %ebx +; X32-NEXT:    adcl %edx, %ebx +; X32-NEXT:    movl 56(%ebp), %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    xorl %edx, %edx +; X32-NEXT:    mull %edx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %ecx -; X32-NEXT:    addl %eax, %ebx +; X32-NEXT:    movl %esi, %ebp +; X32-NEXT:    addl %eax, %ebp  ; X32-NEXT:    adcl %edx, %edi -; X32-NEXT:    addl %esi, %ebx -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ebx, %edi  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edi, %eax @@ -210,10 +203,10 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 40(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx @@ -228,46 +221,42 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl %ebx, %ecx  ; X32-NEXT:    addl %esi, %edi  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl %ebp, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    seto %al -; X32-NEXT:    lahf -; X32-NEXT:    movl %eax, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 12(%ebp), %ecx +; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx  ; X32-NEXT:    movl 16(%ecx), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ebx, %ebx  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl 20(%ecx), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %esi, %ebx +; X32-NEXT:    addl %ebp, %ebx  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    addl %edi, %ebx  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    adcl %ebp, %ecx  ; X32-NEXT:    setb %bl  ; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    movzbl %bl, %esi  ; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 24(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %edx, %edx @@ -276,55 +265,54 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edi, %ebx  ; X32-NEXT:    addl %eax, %ebx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    adcl %edx, %eax  ; X32-NEXT:    addl %ecx, %ebx -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %eax -; X32-NEXT:    movl %eax, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %ecx, %eax +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT:    adcl %edx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %edx, %eax -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %esi, %eax +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    movl %edx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %ecx, %eax +; X32-NEXT:    adcl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edx, %eax +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %esi, %eax +; X32-NEXT:    adcl %edx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    adcl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %ebx, %eax +; X32-NEXT:    adcl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %eax -; X32-NEXT:    movl 20(%eax), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi +; X32-NEXT:    movl 20(%edi), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx @@ -333,31 +321,30 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %ebx, %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl %edi, %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl %ebp, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %ebx, %ecx  ; X32-NEXT:    setb %bl  ; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    movzbl %bl, %esi  ; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl 8(%ebp), %eax -; X32-NEXT:    movl 24(%eax), %eax +; X32-NEXT:    movl 24(%edi), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %edx, %edx  ; X32-NEXT:    mull %edx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %edx +; X32-NEXT:    movl %ebp, %edi  ; X32-NEXT:    addl %eax, %edi  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    adcl %edx, %ebx  ; X32-NEXT:    addl %ecx, %edi  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %ebx  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -367,8 +354,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %eax -; X32-NEXT:    movl 4(%eax), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT:    movl 4(%ecx), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx @@ -377,8 +364,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %ecx, %esi  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    addl %ebx, %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl %ebp, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %ecx, %edi  ; X32-NEXT:    setb %cl @@ -386,8 +373,9 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movzbl %cl, %eax  ; X32-NEXT:    adcl %edx, %eax +; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 8(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx @@ -395,57 +383,54 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %eax, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %esi -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl %ebp, %esi +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %ecx  ; X32-NEXT:    adcl %edx, %ecx  ; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl %ebx, %ecx  ; X32-NEXT:    movl %esi, %edx +; X32-NEXT:    movl %esi, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %edx  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    pushl %eax -; X32-NEXT:    seto %al -; X32-NEXT:    lahf -; X32-NEXT:    movl %eax, %edx -; X32-NEXT:    popl %eax -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl %eax, %edx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl %ecx, %edx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %esi, %edx -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    adcl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edx, %eax -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %edx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    adcl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    addl %edx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -458,82 +443,78 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    addl %edx, %eax -; X32-NEXT:    adcl %ebx, %esi +; X32-NEXT:    adcl %ebp, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl %edx, %ecx -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl %edx, %ebx +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %edi, %eax +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    adcl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 40(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %edi -; X32-NEXT:    addl %eax, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    addl %esi, %edi -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %edx +; X32-NEXT:    addl %eax, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl %ecx, %ebp +; X32-NEXT:    addl %edi, %edx +; X32-NEXT:    adcl %ebx, %ebp  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %edi, %eax -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edx, %eax +; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edx, %eax +; X32-NEXT:    adcl %ebp, %eax +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    seto %al -; X32-NEXT:    lahf -; X32-NEXT:    movl %eax, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %edi, %eax +; X32-NEXT:    adcl %edx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %edx, %eax +; X32-NEXT:    adcl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 12(%ebp), %esi +; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi  ; X32-NEXT:    movl 48(%esi), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %eax, %ebp  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl 52(%esi), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -542,38 +523,37 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %edi, %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl %ebx, %esi +; X32-NEXT:    addl %ebp, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    setb %bl  ; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    movzbl %bl, %esi  ; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 56(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %edx, %edx  ; X32-NEXT:    mull %edx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebp, %ebx  ; X32-NEXT:    addl %eax, %ebx -; X32-NEXT:    movl %edi, %edx -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    adcl %edx, %edi  ; X32-NEXT:    addl %ecx, %ebx  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %edi  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT:    movl %edx, %eax +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %edx, %eax +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movl %edx, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload @@ -584,67 +564,64 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %eax -; X32-NEXT:    movl 64(%eax), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi +; X32-NEXT:    movl 64(%edi), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %ecx -; X32-NEXT:    movl %eax, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %ecx +; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    addl %eax, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    adcl %esi, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    addl %edx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %eax -; X32-NEXT:    movl 80(%eax), %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    adcl %edx, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl 80(%edi), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %edx, %edx  ; X32-NEXT:    mull %edx +; X32-NEXT:    movl %ebp, %edi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    addl %eax, %edi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl %esi, %edi  ; X32-NEXT:    adcl %edx, %ebx  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 12(%ebp), %ecx +; X32-NEXT:    addl %eax, %esi +; X32-NEXT:    adcl %edx, %ecx +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx  ; X32-NEXT:    movl 80(%ecx), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    xorl %ebx, %ebx -; X32-NEXT:    mull %ebx +; X32-NEXT:    xorl %edi, %edi +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %edx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    adcl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl 64(%ecx), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    mull %ebx +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, %ecx  ; X32-NEXT:    addl %esi, %ecx  ; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %ebx, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload @@ -654,15 +631,16 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, %eax -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload @@ -677,122 +655,116 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl %edx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    pushl %eax -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    addb $127, %al -; X32-NEXT:    sahf -; X32-NEXT:    popl %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    adcl %edx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload +; X32-NEXT:    addb $255, %al +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    adcl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    adcl %edx, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    adcl %edx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    adcl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    adcl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl %esi, %eax +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    adcl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %edx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    adcl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %esi, %eax +; X32-NEXT:    adcl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 68(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %eax, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl %edi, %esi +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl %ebp, %edi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    addl %ebx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    addl %ebx, %edi +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ebp, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    addl %eax, %ecx -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload -; X32-NEXT:    adcl %edx, %edi -; X32-NEXT:    movl 8(%ebp), %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT:    adcl %edx, %esi +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 72(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %edx, %edx  ; X32-NEXT:    mull %edx -; X32-NEXT:    movl %eax, %esi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    addl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    adcl %edx, %ebx -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    adcl %edi, %ebx +; X32-NEXT:    movl %ebx, %edx +; X32-NEXT:    addl %eax, %ebx +; X32-NEXT:    adcl %edi, %ebp +; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    adcl %esi, %ebp +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    addl %edx, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %edx, %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %ecx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl %eax, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl %ebx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl %ebx, %ecx +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    adcl %ebp, %ecx +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    addl %edx, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %ecx +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx  ; X32-NEXT:    movl 84(%ecx), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx @@ -806,48 +778,46 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %edi, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %ebx, %ecx -; X32-NEXT:    setb %bl +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    addl %eax, %ecx -; X32-NEXT:    movzbl %bl, %esi -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl 8(%ebp), %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload +; X32-NEXT:    adcl %edx, %ebp +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 88(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %edx, %edx  ; X32-NEXT:    mull %edx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %ebx -; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    movl %edi, %esi +; X32-NEXT:    addl %eax, %esi +; X32-NEXT:    adcl %edx, %ebx +; X32-NEXT:    addl %ecx, %esi +; X32-NEXT:    adcl %ebp, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %edx, %eax -; X32-NEXT:    addl %ecx, %edi -; X32-NEXT:    adcl %esi, %eax -; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %eax, %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %esi, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    adcl %ebx, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    adcl %esi, %edi -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl %ebx, %edi -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    addl %edi, %esi +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 84(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx @@ -857,100 +827,96 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %esi, %edi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl %ebp, %edi  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb %bl  ; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    movzbl %bl, %edi  ; X32-NEXT:    adcl %edx, %edi -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 88(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %edx, %edx  ; X32-NEXT:    mull %edx -; X32-NEXT:    movl %eax, %esi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %esi, %ebx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %ebp, %ebx +; X32-NEXT:    addl %eax, %ebx  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edi, %esi -; X32-NEXT:    movl %esi, %edx  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ebp, %ecx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 12(%ebp), %eax -; X32-NEXT:    movl 68(%eax), %eax +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT:    movl 68(%ecx), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx -; X32-NEXT:    xorl %ebx, %ebx  ; X32-NEXT:    movl %eax, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl %edi, %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    addl %ebx, %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl %ebp, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %ebx, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload  ; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 72(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    mull %ebx +; X32-NEXT:    xorl %edx, %edx +; X32-NEXT:    mull %edx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    addl %eax, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %ebx, %eax -; X32-NEXT:    addl %ecx, %edi -; X32-NEXT:    adcl %esi, %eax -; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ebp, %edx +; X32-NEXT:    addl %eax, %ebp +; X32-NEXT:    adcl %edi, %ebx +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    adcl %esi, %ebx  ; X32-NEXT:    movl %edx, %eax  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, %esi -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %esi -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ecx, %esi -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ebp, %ecx +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ebx, %ecx +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl %edx, %ecx +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    adcl %ebx, %edi -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    adcl %edi, %ecx -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edi, %ebp +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl %ebp, %ebx +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    addl %edx, %eax @@ -961,9 +927,9 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %edx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 12(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx @@ -984,136 +950,131 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, %edx  ; X32-NEXT:    addl %ecx, %ebx  ; X32-NEXT:    adcl %esi, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl (%esp), %edi # 4-byte Reload +; X32-NEXT:    addl %ebp, %edi +; X32-NEXT:    movl %edi, (%esp) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    addl %edi, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    addl %ecx, %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    addl %edi, %esi  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl %esi, %edx -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    movl %ebx, %ecx +; X32-NEXT:    adcl $0, %ecx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    addl %ebp, %esi +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %edx -; X32-NEXT:    adcl $0, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl (%esp), %esi # 4-byte Reload  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    adcl $0, %eax -; X32-NEXT:    addl %edx, %ecx -; X32-NEXT:    adcl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT:    adcl $0, %edx +; X32-NEXT:    addl %ecx, %esi +; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    setb %dl -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl %esi, (%esp) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movzbl %dl, %edx  ; X32-NEXT:    adcl %ebx, %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    addl %ecx, %ebx -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl (%esp), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    adcl %ebx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    adcl %eax, %edi  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 44(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    movl %eax, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl %esi, %ecx +; X32-NEXT:    addl %esi, %ebx  ; X32-NEXT:    adcl $0, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, %ecx -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    addl %edi, %ebx +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %edx -; X32-NEXT:    movl %esi, %ecx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    addl %ebx, %edx -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 1-byte Folded Reload -; X32-NEXT:    adcl %edi, %ebx -; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    setb %bl  ; X32-NEXT:    addl %eax, %edx -; X32-NEXT:    adcl %ecx, %ebx +; X32-NEXT:    movzbl %bl, %eax +; X32-NEXT:    adcl %ecx, %eax +; X32-NEXT:    addl %edi, %edx +; X32-NEXT:    adcl %esi, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    movl %edi, (%esp) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl %ecx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ebx, %ecx +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    movl %ecx, %edi  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl %ebp, (%esp) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %eax -; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    adcl %edi, %ebx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 1-byte Folded Reload +; X32-NEXT:    adcl %edx, %ebp +; X32-NEXT:    adcl $0, %ecx +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    addl %eax, %ecx +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ebx, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    adcl %ebp, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edi +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    pushl %eax -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    addb $127, %al -; X32-NEXT:    sahf -; X32-NEXT:    popl %eax -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl (%esp), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, %eax  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1123,76 +1084,74 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 12(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    addl %ebx, %ecx -; X32-NEXT:    adcl $0, %edx +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl %edi, %ecx -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edx -; X32-NEXT:    setb %cl +; X32-NEXT:    addl %edi, %ebx +; X32-NEXT:    adcl $0, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    addl %esi, %ebx +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    setb %bl  ; X32-NEXT:    addl %eax, %edx -; X32-NEXT:    movzbl %cl, %eax -; X32-NEXT:    adcl %esi, %eax -; X32-NEXT:    movl %edi, %esi -; X32-NEXT:    addl %edi, %edx -; X32-NEXT:    adcl %ebx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    addl %edi, %ecx +; X32-NEXT:    movzbl %bl, %ebp +; X32-NEXT:    adcl %ecx, %ebp +; X32-NEXT:    movl %esi, %ecx +; X32-NEXT:    addl %esi, %edx +; X32-NEXT:    adcl %edi, %ebp +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    addl %ecx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl %eax, %esi +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %ecx, %edi +; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    adcl $0, %ecx +; X32-NEXT:    movl %ebp, %esi  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebx, %edi +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    adcl $0, %eax -; X32-NEXT:    addl %esi, %ecx -; X32-NEXT:    adcl %ebx, %eax -; X32-NEXT:    setb %bl -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movzbl %bl, %esi -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    adcl %esi, %edi +; X32-NEXT:    setb %cl +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    adcl %edx, %ecx +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl %edi, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    addl %eax, %ebx  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    adcl %ebp, %edi  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, %eax -; X32-NEXT:    addb $127, %al -; X32-NEXT:    sahf +; X32-NEXT:    addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1203,59 +1162,59 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 44(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    addl %esi, %ecx +; X32-NEXT:    movl %edx, %ebp +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl %edi, %ebx -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    addl %ecx, %ebx -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi -; X32-NEXT:    setb %bl -; X32-NEXT:    addl %eax, %esi -; X32-NEXT:    movzbl %bl, %eax +; X32-NEXT:    addl %edi, %ecx +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %esi, %ebp +; X32-NEXT:    setb %cl +; X32-NEXT:    addl %eax, %ebp +; X32-NEXT:    movzbl %cl, %eax  ; X32-NEXT:    adcl %edx, %eax -; X32-NEXT:    movl %ecx, %edx -; X32-NEXT:    addl %ecx, %esi -; X32-NEXT:    adcl %edi, %eax +; X32-NEXT:    addl %edi, %ebp +; X32-NEXT:    adcl %esi, %eax +; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %esi, %edx +; X32-NEXT:    movl %ebp, %edx  ; X32-NEXT:    adcl $0, %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ebx, %edi +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %eax  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    adcl %ebx, %ecx +; X32-NEXT:    addl %edx, %eax +; X32-NEXT:    adcl %edi, %ecx  ; X32-NEXT:    setb %dl  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movzbl %dl, %edx -; X32-NEXT:    adcl %esi, %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl %edi, %ebx +; X32-NEXT:    movzbl %dl, %eax +; X32-NEXT:    adcl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl $0, %ebp +; X32-NEXT:    movl %esi, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -1263,67 +1222,60 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %ebx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    adcl %eax, %edi +; X32-NEXT:    adcl %ebp, %esi +; X32-NEXT:    addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, %eax -; X32-NEXT:    addb $127, %al -; X32-NEXT:    sahf -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, %eax -; X32-NEXT:    addb $127, %al -; X32-NEXT:    sahf -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addb $255, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    adcl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %eax -; X32-NEXT:    movl 60(%eax), %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ebp +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    movl 60(%eax), %esi +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    addl %ebp, %ebx  ; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %edi, %ebp  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl %bl, %edi +; X32-NEXT:    adcl %edi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -1332,64 +1284,64 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %edi, %ebp +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %ecx, %ebx -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %ebx, %edi  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %edi, %ebp +; X32-NEXT:    movzbl %bl, %eax +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edi -; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    adcl %esi, %ebx +; X32-NEXT:    setb (%esp) # 1-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -1397,38 +1349,38 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ebp, %edi  ; X32-NEXT:    adcl $0, %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp  ; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT:    setb (%esp) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %ebp  ; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload +; X32-NEXT:    movzbl (%esp), %edi # 1-byte Folded Reload  ; X32-NEXT:    adcl %edi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -1451,193 +1403,192 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %esi -; X32-NEXT:    addl %ecx, %esi -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %esi, %ebp +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edi -; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    mull %ecx -; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movzbl %bl, %ecx +; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ecx -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ecx, (%esp) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %edi, %ebx -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    adcl %edi, %esi  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %esi, %ebp +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %edi, %ecx -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %esi -; X32-NEXT:    setb %cl +; X32-NEXT:    adcl %esi, %edi +; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %esi, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    addl %eax, %ebx  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl (%esp), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    setb (%esp) # 1-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl %edx, %ebp +; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload  ; X32-NEXT:    adcl %esi, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %ebx -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %ebp +; X32-NEXT:    adcl %edx, %edi  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %edi, %edx +; X32-NEXT:    addl %ebx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %esi -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT:    adcl %eax, %ebx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    movzbl (%esp), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %ebp +; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %esi +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx -; X32-NEXT:    setb %bl -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %esi, %ecx +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx +; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill +; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    setb %cl +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %ebx +; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -1647,35 +1598,37 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull %edi +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %ecx -; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -1685,13 +1638,13 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, %ecx  ; X32-NEXT:    addl %ebx, %ecx -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    adcl $0, %ebp +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 28(%eax), %ebx  ; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ebx @@ -1699,118 +1652,117 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ebx +; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill +; X32-NEXT:    adcl %ebp, %ebx  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %ebx +; X32-NEXT:    addl %eax, %ebp  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl (%esp), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT:    movzbl %bl, %esi  ; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %edi -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edx, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %ebx, %edx +; X32-NEXT:    addl %ebp, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, (%esp) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %ecx, %edi -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    addl %ecx, %esi +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ebx +; X32-NEXT:    adcl %ebx, %ecx +; X32-NEXT:    setb %bl +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %edi  ; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -1820,81 +1772,81 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %ebx -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    mull %ebp +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %ebx, %ecx -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    adcl %esi, %ebp  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ebx -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    mull %edi +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %ebx +; X32-NEXT:    addl %eax, %ebp  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1905,42 +1857,40 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload  ; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %edi -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edx, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %ebx, %edx +; X32-NEXT:    addl %ebp, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload @@ -1950,8 +1900,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT:    adcl %eax, %ebx -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %eax, %ebp  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl $0, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -1965,41 +1914,42 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl 8(%ebp), %eax -; X32-NEXT:    movl 28(%eax), %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    movl 28(%eax), %ecx +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    addl %esi, %ebx  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %edi, %ebp  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl %bl, %edi +; X32-NEXT:    adcl %edi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -2010,62 +1960,63 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %edi, %ebp +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx -; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    adcl %ebx, %edi +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl %esi, %ebx +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %edi, %ebp +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edi -; X32-NEXT:    setb %cl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    adcl %esi, %ebx +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -2073,36 +2024,36 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ebp, %edi  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp  ; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload  ; X32-NEXT:    adcl %edi, %edx @@ -2127,31 +2078,31 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %esi -; X32-NEXT:    addl %ecx, %esi -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %esi, %ebx +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edi +; X32-NEXT:    adcl %edi, %esi  ; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movzbl %bl, %esi +; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -2160,160 +2111,158 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %ecx, %ebx -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    adcl %edi, %esi  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %esi, %ebx +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %edi, %ecx -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %esi -; X32-NEXT:    setb %cl +; X32-NEXT:    adcl %esi, %edi +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %esi, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %edi -; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %ebp +; X32-NEXT:    adcl %edx, %edi +; X32-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT:    movzbl %cl, %esi  ; X32-NEXT:    adcl %esi, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %ebx -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %esi +; X32-NEXT:    adcl %edx, %ebx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    addl %ebp, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %edi, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl %ecx, %esi +; X32-NEXT:    adcl %edi, %edx  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT:    adcl %eax, %ebx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl $0, %eax -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    adcl %eax, %esi +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %esi +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx -; X32-NEXT:    setb %bl -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %esi, %ecx +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    setb %cl +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    mull %ebp +; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -2323,62 +2272,64 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull %edi +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %ecx -; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl %ebp, %esi +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %ebx, %ecx +; X32-NEXT:    addl %ebp, %ecx  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ebx +; X32-NEXT:    adcl %esi, %ebp  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    mull %ebx +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -2387,8 +2338,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ebx  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %ebx @@ -2396,24 +2346,25 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi @@ -2422,43 +2373,41 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %edi -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edx, %ebp  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    addl %ebx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    movl %ebx, %eax @@ -2466,21 +2415,22 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %ecx, %edi -; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %edx, %ebp +; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    addl %ecx, %esi +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    adcl %ebp, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -2499,167 +2449,162 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %edi, %esi +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %ebx -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %ebx, %ecx +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    adcl %esi, %ebx  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ebx -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    mull %edi +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ebx -; X32-NEXT:    adcl %edx, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl %edx, %ecx +; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl %edx, %ebp +; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload  ; X32-NEXT:    adcl %esi, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edx, %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    adcl %edx, %ebp  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    addl %ebx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    adcl %eax, %edi -; X32-NEXT:    adcl $0, %ecx +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, %edi +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl %eax, %ebx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl (%esp), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -2668,29 +2613,30 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    addl %ebp, %ebx  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill +; X32-NEXT:    adcl %edi, %ebp  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -2701,81 +2647,82 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx -; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    adcl %ebx, %ecx +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl %edi, %ebp  ; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload +; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %ebx, %ecx -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ebx -; X32-NEXT:    setb %cl +; X32-NEXT:    adcl %esi, %edi +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %ebx +; X32-NEXT:    addl %eax, %ebp  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl (%esp), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    setb (%esp) # 1-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -2786,67 +2733,66 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload  ; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %edi -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edx, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %ebx, %edx +; X32-NEXT:    addl %ebp, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %esi -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    movzbl (%esp), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %esi, %ebx +; X32-NEXT:    addl (%esp), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    adcl %edi, %ebp  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ecx -; X32-NEXT:    addl %esi, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl %bl, %edi +; X32-NEXT:    adcl %edi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -2854,65 +2800,65 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl %esi, (%esp) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %edi, %ebp +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx -; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    adcl %ebx, %edi +; X32-NEXT:    setb %cl +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %edi, %ebp +; X32-NEXT:    movzbl %cl, %eax +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %ebx, %ecx -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ebx -; X32-NEXT:    setb %cl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    adcl %esi, %ebx +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    mull %ecx  ; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -2920,38 +2866,38 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %esi  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl (%esp), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl %bl, %edi  ; X32-NEXT:    adcl %edi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -2959,7 +2905,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %edi  ; X32-NEXT:    adcl %edx, %ebx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT:    movl (%esp), %edx # 4-byte Reload  ; X32-NEXT:    addl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %esi @@ -2971,7 +2917,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -2982,30 +2928,31 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    mull %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %edi, %ecx  ; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %ecx -; X32-NEXT:    setb %bl  ; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ebx, %edi +; X32-NEXT:    setb %cl +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %esi +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -3015,106 +2962,107 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %esi, %ebx +; X32-NEXT:    addl %ecx, %ebx  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %esi, %edi +; X32-NEXT:    mull %ebp +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %esi, %ecx -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl 12(%ebp), %eax -; X32-NEXT:    movl 60(%eax), %esi  ; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %ebp, %ecx +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    movl 60(%eax), %ebp +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ebp +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edi +; X32-NEXT:    adcl %edi, %esi  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    mull %ebp +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %ecx -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %ebp +; X32-NEXT:    adcl %edx, %ecx +; X32-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %esi  ; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload -; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -3122,17 +3070,17 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %eax, %edi  ; X32-NEXT:    adcl %edx, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %ecx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    addl %ebp, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edi  ; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl (%esp), %eax # 4-byte Reload  ; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -3141,13 +3089,13 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %edx -; X32-NEXT:    adcl $0, %ecx +; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload @@ -3155,28 +3103,29 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %esi, %ecx +; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    addl %ecx, %esi  ; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    mull %ebx -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi -; X32-NEXT:    setb %cl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %ebx -; X32-NEXT:    addl %esi, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -3192,7 +3141,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %edi @@ -3206,107 +3156,104 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %ebx -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl (%esp), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %ecx  ; X32-NEXT:    addl %ebx, %ecx -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    mull %ebx -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    adcl %esi, %ebp  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %ebx -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ebx -; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %edx, %ebp +; X32-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl (%esp), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    adcl $0, %ecx +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    addl (%esp), %edi # 4-byte Folded Reload +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx  ; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload  ; X32-NEXT:    adcl %esi, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    setb (%esp) # 1-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload +; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %edi -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %esi +; X32-NEXT:    adcl %edx, %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    addl %ebx, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %ebx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl %ecx, %esi +; X32-NEXT:    adcl %ebp, %edx  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT:    adcl %eax, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl $0, %eax -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    adcl %eax, %esi +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, %ebx +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %esi, (%esp) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload @@ -3318,97 +3265,91 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %ebx  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl $0, %ebp +; X32-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl (%esp), %eax # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    adcl $0, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl $0, %ebp +; X32-NEXT:    movl (%esp), %ebx # 4-byte Reload  ; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, %eax -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    addl %ebp, %ebx  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    adcl %edi, %ebp  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ecx -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -3419,64 +3360,64 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %ebx, %ecx  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    movzbl %bl, %eax +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %esi, %ecx -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ebx -; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %esi, %edi +; X32-NEXT:    setb %bl +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload @@ -3484,8 +3425,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ebx  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %ebx @@ -3493,7 +3433,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -3504,133 +3445,133 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload  ; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %edi -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edx, %ebp  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    addl %ebx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    addl %ebp, %ebx  ; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %edi, %ebp  ; X32-NEXT:    setb %bl  ; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edx, %edi -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edx, %esi +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx -; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    adcl %ebx, %ecx +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl %edi, %ebp  ; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %ebx, %ecx -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ebx -; X32-NEXT:    setb %cl +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    adcl %esi, %edi +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -3638,16 +3579,16 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -3658,18 +3599,18 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl %bl, %edi  ; X32-NEXT:    adcl %edi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -3700,29 +3641,30 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %esi, %ecx -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    mull %ebx -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    addl %edi, %ecx +; X32-NEXT:    adcl $0, %ebp +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    adcl %ebp, %edi  ; X32-NEXT:    setb %cl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ebx -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    mull %esi +; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -3735,61 +3677,61 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %ecx, %edi -; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    adcl %edi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ebx -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ecx  ; X32-NEXT:    addl %ebx, %ecx -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    mull %ebx -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %edi +; X32-NEXT:    adcl %edi, %esi  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %ebx -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -3798,8 +3740,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ebx  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %ebx @@ -3807,24 +3748,25 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi @@ -3833,43 +3775,41 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %edi -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edx, %ebp  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    addl %ebx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    movl %ebx, %eax @@ -3877,21 +3817,22 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, %esi  ; X32-NEXT:    addl %ecx, %esi -; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %ebp, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -3910,56 +3851,57 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %ebx -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %ecx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %esi +; X32-NEXT:    adcl %esi, %ebx  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    mull %ebx -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    mull %edi +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -3968,7 +3910,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %esi  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %esi @@ -3978,23 +3920,24 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl %ecx, %edi  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %edi, %ebp  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -4010,15 +3953,15 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl %edx, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %esi  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %esi, %ebp  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -4032,23 +3975,21 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %ebx  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -4062,47 +4003,49 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl (%esp), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl $0, %edx +; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl $0, %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl $0, %ebp +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    adcl %edi, %ebp  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ecx -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -4113,28 +4056,28 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %ecx, %ebx -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %esi, %ebp +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %ebx, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    addl %ecx, %ebx @@ -4147,70 +4090,72 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl 8(%ebp), %eax -; X32-NEXT:    movl 76(%eax), %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edx -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    movl 76(%eax), %ecx +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %ecx, %edi +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %edi -; X32-NEXT:    setb %cl +; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    setb (%esp) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    mull %edi +; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %ecx -; X32-NEXT:    adcl %edx, %esi +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %ebp +; X32-NEXT:    adcl %edx, %ecx  ; X32-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl (%esp), %ebx # 4-byte Folded Reload +; X32-NEXT:    setb (%esp) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %esi  ; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload -; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    movzbl (%esp), %esi # 1-byte Folded Reload +; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -4218,245 +4163,245 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %eax, %edi  ; X32-NEXT:    adcl %edx, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %ecx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    addl %ebp, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edi  ; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    adcl %edi, %ebp  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ecx -; X32-NEXT:    addl %esi, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl %bl, %edi +; X32-NEXT:    adcl %edi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ecx -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ecx, (%esp) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %edi, %ebp +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %ebx, %edi  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %edi, %ebp +; X32-NEXT:    movzbl %bl, %eax +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edi -; X32-NEXT:    setb %cl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi  ; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    adcl %esi, %ebx +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    addl %eax, %esi  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl (%esp), %esi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT:    adcl %esi, %edx +; X32-NEXT:    mull %edi +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl %bl, %edi +; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %ebx -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %edi, %edx +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    adcl %edx, %ebx +; X32-NEXT:    movl (%esp), %edx # 4-byte Reload +; X32-NEXT:    addl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT:    adcl %eax, %ebx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl $0, %eax +; X32-NEXT:    adcl %eax, %edi +; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, (%esp) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx -; X32-NEXT:    setb %bl -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %edi, %ecx +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ebx, %edi +; X32-NEXT:    setb %cl +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %ebp, %esi +; X32-NEXT:    mull %ebp +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edx, %edi +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    adcl %edi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -4467,67 +4412,66 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ecx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl 8(%ebp), %eax -; X32-NEXT:    movl 92(%eax), %ebx +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    movl 92(%eax), %ebp  ; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    mull %ebx -; X32-NEXT:    movl %ebx, %esi -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    mull %ebp +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ebx +; X32-NEXT:    adcl %edi, %esi  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    mull %ebp +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %ecx -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %ebp +; X32-NEXT:    adcl %edx, %ecx +; X32-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %esi  ; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload -; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -4535,17 +4479,17 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %eax, %edi  ; X32-NEXT:    adcl %edx, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %ecx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    addl %ebp, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edi  ; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl (%esp), %eax # 4-byte Reload  ; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill @@ -4554,42 +4498,43 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %edx -; X32-NEXT:    adcl $0, %ecx +; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    addl %ecx, %esi +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %edi, %ecx -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    setb (%esp) # 1-byte Folded Spill +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    mull %ebp  ; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %esi -; X32-NEXT:    setb %cl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %esi, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    movzbl (%esp), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -4599,63 +4544,63 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %eax, (%esp) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %ecx -; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %edi, %ecx -; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %ebx, %ecx +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %esi +; X32-NEXT:    adcl %esi, %ebp  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edi, %ebx -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    mull %ebx +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -4664,8 +4609,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %esi  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %esi @@ -4681,7 +4625,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %edi @@ -4694,7 +4639,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %eax, %edi  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -4707,56 +4652,59 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl %edx, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, %ebp +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %esi  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %eax, (%esp) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl $0, %ebp +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %edi, %ebp  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl %bl, %edi +; X32-NEXT:    adcl %edi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -4765,41 +4713,43 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %ebx, %edi  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %esi, %ebp +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movzbl %bl, %edi +; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %esi -; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    adcl %edx, %esi +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -4808,7 +4758,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %edi @@ -4822,7 +4772,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %eax, %edi  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -4844,31 +4794,32 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    setb %bl +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %esi, %ebp  ; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -4886,15 +4837,14 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %edi, %ecx -; X32-NEXT:    imull %eax, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    imull %eax, %ebp +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl %ecx, %edx -; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    addl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl %ebp, %edx +; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    addl %edx, %ecx +; X32-NEXT:    movl %ecx, %ebp  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    movl %eax, %esi  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -4908,7 +4858,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %edx, %esi  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    adcl %ebp, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl %edi, %esi @@ -4918,20 +4868,20 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    addl %ecx, %ebx -; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    adcl %ebp, %esi  ; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ecx +; X32-NEXT:    mull %edi  ; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx @@ -4939,9 +4889,10 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    imull %eax, %esi +; X32-NEXT:    imull %ebp, %esi +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -4950,62 +4901,62 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %edx, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    imull %ebx, %esi +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    imull %edi, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %esi, %edx +; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    addl %ebx, %edx  ; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    imull %eax, %ecx  ; X32-NEXT:    addl %edx, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    mull %ebp  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %esi -; X32-NEXT:    addl %ecx, %esi -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    mull %ebp  ; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %esi, %ecx -; X32-NEXT:    adcl %edi, %ebx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ebp, %edi +; X32-NEXT:    adcl %ebx, %esi +; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT:    adcl %esi, %edx +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movzbl %bl, %ecx +; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %esi -; X32-NEXT:    movl 104(%esi), %ebx -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi +; X32-NEXT:    movl 104(%esi), %ebp +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -5014,274 +4965,274 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ebx +; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ecx +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %ebx, %edi +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    xorl %ecx, %ecx  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %ecx -; X32-NEXT:    movl 96(%ecx), %edi -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi +; X32-NEXT:    movl 96(%edi), %ebx +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 100(%ecx), %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %esi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl 100(%edi), %edi  ; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edi +; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %edi, %ecx +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT:    adcl %eax, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    xorl %edx, %edx -; X32-NEXT:    mull %edx +; X32-NEXT:    adcl %eax, %ebx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    xorl %ecx, %ecx +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %edx, %eax -; X32-NEXT:    addl %ecx, %edi -; X32-NEXT:    adcl %esi, %eax -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl %edi, %ecx +; X32-NEXT:    adcl %ebx, %eax +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx  ; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ebx -; X32-NEXT:    setb %cl +; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %esi +; X32-NEXT:    addl %eax, %ebp  ; X32-NEXT:    adcl %edx, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebp +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    addl %ebp, %ebx +; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    adcl %edi, %ecx  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ebp  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    addl %eax, %esi  ; X32-NEXT:    adcl %edx, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload -; X32-NEXT:    adcl %eax, %edi -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %eax, %esi +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 8(%ebp), %ecx -; X32-NEXT:    movl 112(%ecx), %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    imull %eax, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi +; X32-NEXT:    movl 112(%esi), %edi +; X32-NEXT:    imull %edi, %ebp +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl %esi, %edx -; X32-NEXT:    movl 116(%ecx), %eax +; X32-NEXT:    addl %ebp, %edx +; X32-NEXT:    movl 116(%esi), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    imull %eax, %edi -; X32-NEXT:    addl %edx, %edi -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 120(%ecx), %eax +; X32-NEXT:    imull %eax, %ecx +; X32-NEXT:    addl %edx, %ecx  ; X32-NEXT:    movl %ecx, %ebx -; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    movl 120(%esi), %eax +; X32-NEXT:    movl %eax, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    imull %esi, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx +; X32-NEXT:    imull %esi, %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp +; X32-NEXT:    addl %ecx, %edx +; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT:    movl 124(%ecx), %ecx +; X32-NEXT:    imull %ebp, %ecx +; X32-NEXT:    addl %edx, %ecx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl %edi, %edx -; X32-NEXT:    movl 124(%ebx), %ebx -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    imull %ecx, %ebx -; X32-NEXT:    addl %edx, %ebx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ebx, %ecx +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ebp, %ebx +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebp +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ecx, %esi -; X32-NEXT:    setb %cl +; X32-NEXT:    adcl %esi, %ebp +; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    addl %esi, %eax -; X32-NEXT:    movzbl %cl, %ecx -; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    mull %edi +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl %bl, %esi +; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edx +; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    imull %eax, %edi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx +; X32-NEXT:    imull %eax, %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl %edi, %edx -; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %edx, %ecx -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl %esi, %edx +; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    addl %edx, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    imull %ebx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %eax, %esi -; X32-NEXT:    addl %ecx, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    imull %ebp, %esi  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    imull %edi, %ecx +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %esi, %edx +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    imull %eax, %ecx  ; X32-NEXT:    addl %edx, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ebx, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl %edi, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %esi, %ebx -; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ebp +; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    addl %ebx, %esi +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %ebx, %ecx -; X32-NEXT:    adcl %edi, %esi -; X32-NEXT:    setb %bl +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %esi, %edi +; X32-NEXT:    adcl %ebp, %ebx +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    addl %esi, %eax -; X32-NEXT:    movzbl %bl, %esi +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload  ; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, %ecx  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload @@ -5292,7 +5243,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload @@ -5305,10 +5256,9 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    adcl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    addl (%esp), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -5320,37 +5270,38 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 12(%ebp), %eax -; X32-NEXT:    movl 92(%eax), %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    movl 92(%eax), %esi +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %edi, %ebp  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl %bl, %edi +; X32-NEXT:    adcl %edi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -5361,62 +5312,62 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %edi, %ebp +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %ebx, %edi  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %edi, %ebp +; X32-NEXT:    movzbl %bl, %eax +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edi -; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    adcl %esi, %ebx +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -5424,36 +5375,36 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %ecx  ; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ebp, %edi  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp  ; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload  ; X32-NEXT:    adcl %edi, %edx @@ -5478,32 +5429,32 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 12(%ebp), %eax +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax  ; X32-NEXT:    movl 76(%eax), %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %esi, %ebp  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx -; X32-NEXT:    setb %bl +; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx +; X32-NEXT:    mull %ebx +; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -5513,157 +5464,158 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx -; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %esi, %ebp +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edi -; X32-NEXT:    setb %cl +; X32-NEXT:    adcl %esi, %edi +; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    mull %ecx  ; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %esi +; X32-NEXT:    addl %eax, %ebx  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ebp +; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload -; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    mull %esi +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload +; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %edi -; X32-NEXT:    adcl %edx, %ebx +; X32-NEXT:    adcl %edx, %ebp  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %esi, %edx +; X32-NEXT:    addl %ebx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edi -; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %esi, %ecx  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx -; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %edi, %esi +; X32-NEXT:    setb %cl +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %ebx +; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload @@ -5673,104 +5625,105 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %esi, %ebx -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl %ebp, %esi +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    adcl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %edi, %ecx -; X32-NEXT:    adcl $0, %ebx -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    addl %ebx, %ecx +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %edi +; X32-NEXT:    adcl %esi, %ebx  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    mull %ebp +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %ecx -; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %ebp +; X32-NEXT:    adcl %edx, %ecx +; X32-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl $0, %ebp  ; X32-NEXT:    adcl $0, %ecx -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %eax, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl $0, %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload -; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    mull %esi +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movzbl %bl, %esi +; X32-NEXT:    adcl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -5778,14 +5731,14 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %eax, %edi  ; X32-NEXT:    adcl %edx, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    addl %ecx, %edx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    addl %ebp, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edi  ; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -5797,42 +5750,43 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %edx -; X32-NEXT:    adcl $0, %ecx +; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    movl %eax, %esi  ; X32-NEXT:    addl %ecx, %esi -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    mull %ebx +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    addl %esi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ebx +; X32-NEXT:    adcl %ebx, %ecx +; X32-NEXT:    setb %bl +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    mull %edi  ; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X32-NEXT:    movzbl %bl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -5842,61 +5796,64 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ebx -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %ecx, %edi +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl %edi, %esi +; X32-NEXT:    mull %ebp +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %ecx, %ebx  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %ebx  ; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %ebp, %ecx +; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %ebx, %esi +; X32-NEXT:    adcl %esi, %ebp  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    addl %esi, %eax +; X32-NEXT:    mull %edi +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -5905,7 +5862,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    addl %eax, %esi  ; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    addl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, %esi @@ -5915,28 +5872,29 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl %ecx, %edi  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, %edi  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT:    mull %ebp  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx @@ -5973,74 +5931,77 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 12(%ebp), %ebx -; X32-NEXT:    movl 96(%ebx), %ecx -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl 100(%ebx), %ebx -; X32-NEXT:    movl %esi, %eax -; X32-NEXT:    mull %ebx +; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT:    movl 96(%ecx), %ebx  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %esi, %ebp +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; X32-NEXT:    movl 100(%eax), %esi +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %esi, %ecx +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl %ebx, %esi +; X32-NEXT:    setb %bl +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %esi, %ebx -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %esi, %edi +; X32-NEXT:    movzbl %bl, %eax  ; X32-NEXT:    adcl %eax, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    xorl %edx, %edx  ; X32-NEXT:    mull %edx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    addl %eax, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl %edx, %esi -; X32-NEXT:    addl %ebx, %edi -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl %edi, %ebx +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %ecx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    mull %ebp +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ecx +; X32-NEXT:    mull %ebp  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %ebx, %ecx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ecx, %edi  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    mull %ebx -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %edi -; X32-NEXT:    setb %cl +; X32-NEXT:    adcl %esi, %ecx +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ebx -; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movzbl %cl, %ecx +; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload @@ -6054,37 +6015,35 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl 12(%ebp), %eax -; X32-NEXT:    movl 104(%eax), %ecx +; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebp +; X32-NEXT:    movl 104(%ebp), %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %esi, %eax  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    movl %eax, %ecx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl 12(%ebp), %eax -; X32-NEXT:    movl 108(%eax), %edx -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    mull %edx -; X32-NEXT:    movl %edx, %esi +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl 108(%ebp), %esi +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %edi  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %esi -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    adcl %ebx, %edi +; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ebx +; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl %esi, %edi -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    addl %edi, %esi +; X32-NEXT:    movzbl %bl, %eax  ; X32-NEXT:    adcl %eax, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    movl %ebx, %eax @@ -6092,20 +6051,20 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    mull %edx  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl %eax, %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    addl %eax, %edi  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl %edx, %eax -; X32-NEXT:    addl %edi, %esi +; X32-NEXT:    addl %esi, %edi  ; X32-NEXT:    adcl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl $0, %esi +; X32-NEXT:    adcl $0, %edi  ; X32-NEXT:    adcl $0, %eax -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill @@ -6113,24 +6072,24 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl %ebx, %esi  ; X32-NEXT:    mull %ebx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ebx, %ebp  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movl %eax, %ebp  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %edi, %eax  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -6143,8 +6102,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl %edx, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -6159,51 +6118,50 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %ecx, %edx  ; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    addl %edx, %esi -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %esi, %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    movl %eax, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    imull %ebx, %esi  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    imull %edi, %esi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %eax, %ebp  ; X32-NEXT:    addl %esi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    imull %edi, %esi +; X32-NEXT:    imull %ecx, %esi  ; X32-NEXT:    addl %edx, %esi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    adcl %ebx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %ecx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %ecx, %ebx +; X32-NEXT:    movl %eax, %edi +; X32-NEXT:    addl %ebx, %edi  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    addl %edi, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %ecx -; X32-NEXT:    setb %bl +; X32-NEXT:    adcl %esi, %ebx +; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 12(%ebp), %edx +; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx  ; X32-NEXT:    movl 124(%edx), %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    imull %eax, %ecx @@ -6214,84 +6172,86 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    addl %ecx, %edx  ; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    addl %edx, %esi -; X32-NEXT:    movl 112(%edi), %ebx -; X32-NEXT:    movl 116(%edi), %ecx -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl 112(%edi), %ebp +; X32-NEXT:    movl 116(%edi), %ebx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    imull %ecx, %edi -; X32-NEXT:    mull %ebx +; X32-NEXT:    imull %ebx, %edi +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    mull %ebp  ; X32-NEXT:    addl %edi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    imull %ebx, %ecx +; X32-NEXT:    imull %ebp, %ecx  ; X32-NEXT:    addl %edx, %ecx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %esi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %esi, %ebx  ; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %esi, %ecx -; X32-NEXT:    adcl %edi, %ebx -; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ebx, %ebp +; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    setb %bl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X32-NEXT:    addl %ebx, %eax -; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload -; X32-NEXT:    adcl %esi, %edx +; X32-NEXT:    mull %esi +; X32-NEXT:    addl %ecx, %eax +; X32-NEXT:    movzbl %bl, %ecx +; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %edi, %ebx +; X32-NEXT:    adcl $0, %ecx +; X32-NEXT:    movl %ebp, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    mull %edi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %ecx, %ebp  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %edi +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl %bl, %edi +; X32-NEXT:    adcl %edi, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -6300,41 +6260,43 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    adcl $0, %edi -; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx -; X32-NEXT:    addl %ebx, %eax +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ebx +; X32-NEXT:    movl %edx, %ebx +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp +; X32-NEXT:    adcl $0, %ebx +; X32-NEXT:    movl %edi, %eax +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %edi, %ecx +; X32-NEXT:    adcl %ebx, %edi  ; X32-NEXT:    setb %bl -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movzbl %bl, %ecx -; X32-NEXT:    adcl %ecx, %edx +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    movl %esi, %ebp +; X32-NEXT:    mull %ecx +; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movzbl %bl, %edi +; X32-NEXT:    adcl %edi, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    addl %eax, %esi -; X32-NEXT:    adcl %edx, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    addl %eax, %edi +; X32-NEXT:    adcl %edx, %esi +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload @@ -6343,7 +6305,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %edi @@ -6357,7 +6319,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %eax, %edi  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload @@ -6383,25 +6345,25 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %ecx, %eax  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    movl %ebx, %eax  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %edi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    addl %ebp, %edi  ; X32-NEXT:    adcl $0, %esi  ; X32-NEXT:    movl %ecx, %eax -; X32-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    mull %ebp  ; X32-NEXT:    movl %edx, %ecx  ; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %eax, %edi  ; X32-NEXT:    adcl %esi, %ecx  ; X32-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill  ; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi +; X32-NEXT:    mull %ebp  ; X32-NEXT:    addl %ecx, %eax  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload  ; X32-NEXT:    adcl %ecx, %edx @@ -6413,61 +6375,62 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    adcl %edx, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload  ; X32-NEXT:    adcl %eax, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl $0, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %edi, %ecx +; X32-NEXT:    movl %ebp, %ecx  ; X32-NEXT:    imull %eax, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    mull %esi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    addl %ecx, %edx -; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    imull %ebp, %esi  ; X32-NEXT:    addl %edx, %esi  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, %esi +; X32-NEXT:    movl %eax, %edi  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    imull %ebx, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    mull %edi -; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %esi, %edx +; X32-NEXT:    imull %ebx, %edi  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    imull %edi, %esi -; X32-NEXT:    addl %edx, %esi +; X32-NEXT:    mull %esi +; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    addl %edi, %edx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    imull %esi, %edi +; X32-NEXT:    addl %edx, %edi  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ecx +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    movl %eax, %ebx -; X32-NEXT:    addl %ecx, %ebx -; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %eax, %ebx +; X32-NEXT:    addl %esi, %ebx +; X32-NEXT:    adcl $0, %edi +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    mull %ebp +; X32-NEXT:    movl %ebp, %esi +; X32-NEXT:    movl %edx, %ebp  ; X32-NEXT:    addl %ebx, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl %esi, %edi -; X32-NEXT:    setb %bl +; X32-NEXT:    adcl %edi, %ebp +; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    mull %ecx -; X32-NEXT:    addl %edi, %eax -; X32-NEXT:    movzbl %bl, %ecx +; X32-NEXT:    mull %esi +; X32-NEXT:    addl %ebp, %eax +; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %edx  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -6476,146 +6439,139 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    imull %eax, %edi -; X32-NEXT:    movl %eax, %esi  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    mull %ecx -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %eax, %ebp  ; X32-NEXT:    addl %edi, %edx -; X32-NEXT:    imull {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    imull %ebx, %ecx  ; X32-NEXT:    addl %edx, %ecx  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    movl %eax, %ecx +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    imull %esi, %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    imull %edi, %ecx -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    mull %ebx +; X32-NEXT:    mull %edi  ; X32-NEXT:    addl %ecx, %edx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    imull %ebx, %ecx +; X32-NEXT:    imull %edi, %ecx  ; X32-NEXT:    addl %edx, %ecx -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    addl %ebp, %eax  ; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %ebx, %eax -; X32-NEXT:    mull %esi -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl %edi, %eax -; X32-NEXT:    mull %esi +; X32-NEXT:    movl %edi, %ebp +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    mull %ecx +; X32-NEXT:    movl %edx, %edi +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %esi, %eax +; X32-NEXT:    mull %ecx  ; X32-NEXT:    movl %edx, %esi  ; X32-NEXT:    movl %eax, %ecx -; X32-NEXT:    addl %ebx, %ecx +; X32-NEXT:    addl %edi, %ecx  ; X32-NEXT:    adcl $0, %esi -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    movl %ebp, %eax  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %edi -; X32-NEXT:    addl %ecx, %eax -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl %eax, %ebp +; X32-NEXT:    addl %ecx, %ebp  ; X32-NEXT:    adcl %esi, %edi  ; X32-NEXT:    setb %cl  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    mull %ebx  ; X32-NEXT:    movl %edx, %esi -; X32-NEXT:    addl %edi, %eax +; X32-NEXT:    movl %eax, %edx +; X32-NEXT:    addl %edi, %edx  ; X32-NEXT:    movzbl %cl, %ecx  ; X32-NEXT:    adcl %ecx, %esi -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, %ebx -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl %eax, %edx +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, %ebp +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload @@ -6625,18 +6581,18 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; X32-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -6649,13 +6605,10 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload  ; X32-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload  ; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; X32-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT:    movl 16(%ebp), %ecx +; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    movl %esi, (%ecx)  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload @@ -6688,36 +6641,34 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind {  ; X32-NEXT:    movl %esi, 56(%ecx)  ; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload  ; X32-NEXT:    movl %esi, 60(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, 64(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, 68(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, 72(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, 76(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, 80(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, 84(%ecx) -; X32-NEXT:    movl %ebx, 88(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, 92(%ecx) -; X32-NEXT:    movl %edi, 96(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, 100(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, 104(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, 108(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, 112(%ecx) -; X32-NEXT:    movl %edx, 116(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT:    movl %eax, 120(%ecx) -; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, 64(%ecx) +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, 68(%ecx) +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, 72(%ecx) +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, 76(%ecx) +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, 80(%ecx) +; X32-NEXT:    movl %ebp, 84(%ecx) +; X32-NEXT:    movl %edi, 88(%ecx) +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, 92(%ecx) +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, 96(%ecx) +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, 100(%ecx) +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, 104(%ecx) +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, 108(%ecx) +; X32-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT:    movl %esi, 112(%ecx) +; X32-NEXT:    movl %ebx, 116(%ecx) +; X32-NEXT:    movl %edx, 120(%ecx)  ; X32-NEXT:    movl %eax, 124(%ecx) -; X32-NEXT:    addl $996, %esp # imm = 0x3E4 +; X32-NEXT:    addl $1000, %esp # imm = 0x3E8  ; X32-NEXT:    popl %esi  ; X32-NEXT:    popl %edi  ; X32-NEXT:    popl %ebx diff --git a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll index 66047e3677f..d011916f093 100644 --- a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll +++ b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll @@ -1,13 +1,10 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32 -; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64 - -; TODO: Reenable verify-machineinstrs once the if (!AXDead) // FIXME in -; X86InstrInfo::copyPhysReg() is resolved. +; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32 +; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64  ; The peephole optimizer can elide some physical register copies such as  ; EFLAGS. Make sure the flags are used directly, instead of needlessly using -; lahf, when possible. +; saving and restoring specific conditions.  @L = external global i32  @M = external global i8 @@ -209,29 +206,22 @@ exit2:  define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {  ; CHECK32-LABEL: test_intervening_call:  ; CHECK32:       # %bb.0: # %entry -; CHECK32-NEXT:    pushl %ebp -; CHECK32-NEXT:    movl %esp, %ebp  ; CHECK32-NEXT:    pushl %ebx  ; CHECK32-NEXT:    pushl %esi -; CHECK32-NEXT:    movl 12(%ebp), %eax -; CHECK32-NEXT:    movl 16(%ebp), %edx -; CHECK32-NEXT:    movl 20(%ebp), %ebx -; CHECK32-NEXT:    movl 24(%ebp), %ecx -; CHECK32-NEXT:    movl 8(%ebp), %esi -; CHECK32-NEXT:    lock cmpxchg8b (%esi)  ; CHECK32-NEXT:    pushl %eax -; CHECK32-NEXT:    seto %al -; CHECK32-NEXT:    lahf -; CHECK32-NEXT:    movl %eax, %esi -; CHECK32-NEXT:    popl %eax +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ebx +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT:    lock cmpxchg8b (%esi) +; CHECK32-NEXT:    setne %bl  ; CHECK32-NEXT:    subl $8, %esp  ; CHECK32-NEXT:    pushl %edx  ; CHECK32-NEXT:    pushl %eax  ; CHECK32-NEXT:    calll bar  ; CHECK32-NEXT:    addl $16, %esp -; CHECK32-NEXT:    movl %esi, %eax -; CHECK32-NEXT:    addb $127, %al -; CHECK32-NEXT:    sahf +; CHECK32-NEXT:    testb $-1, %bl  ; CHECK32-NEXT:    jne .LBB4_3  ; CHECK32-NEXT:  # %bb.1: # %t  ; CHECK32-NEXT:    movl $42, %eax @@ -240,39 +230,28 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {  ; CHECK32-NEXT:    xorl %eax, %eax  ; CHECK32-NEXT:  .LBB4_2: # %t  ; CHECK32-NEXT:    xorl %edx, %edx +; CHECK32-NEXT:    addl $4, %esp  ; CHECK32-NEXT:    popl %esi  ; CHECK32-NEXT:    popl %ebx -; CHECK32-NEXT:    popl %ebp  ; CHECK32-NEXT:    retl  ;  ; CHECK64-LABEL: test_intervening_call:  ; CHECK64:       # %bb.0: # %entry -; CHECK64-NEXT:    pushq %rbp -; CHECK64-NEXT:    movq %rsp, %rbp  ; CHECK64-NEXT:    pushq %rbx -; CHECK64-NEXT:    pushq %rax  ; CHECK64-NEXT:    movq %rsi, %rax  ; CHECK64-NEXT:    lock cmpxchgq %rdx, (%rdi) -; CHECK64-NEXT:    pushq %rax -; CHECK64-NEXT:    seto %al -; CHECK64-NEXT:    lahf -; CHECK64-NEXT:    movq %rax, %rbx -; CHECK64-NEXT:    popq %rax +; CHECK64-NEXT:    setne %bl  ; CHECK64-NEXT:    movq %rax, %rdi  ; CHECK64-NEXT:    callq bar -; CHECK64-NEXT:    movq %rbx, %rax -; CHECK64-NEXT:    addb $127, %al -; CHECK64-NEXT:    sahf -; CHECK64-NEXT:    jne .LBB4_3 +; CHECK64-NEXT:    testb $-1, %bl +; CHECK64-NEXT:    jne .LBB4_2  ; CHECK64-NEXT:  # %bb.1: # %t  ; CHECK64-NEXT:    movl $42, %eax -; CHECK64-NEXT:    jmp .LBB4_2 -; CHECK64-NEXT:  .LBB4_3: # %f +; CHECK64-NEXT:    popq %rbx +; CHECK64-NEXT:    retq +; CHECK64-NEXT:  .LBB4_2: # %f  ; CHECK64-NEXT:    xorl %eax, %eax -; CHECK64-NEXT:  .LBB4_2: # %t -; CHECK64-NEXT:    addq $8, %rsp  ; CHECK64-NEXT:    popq %rbx -; CHECK64-NEXT:    popq %rbp  ; CHECK64-NEXT:    retq  entry:    ; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS. @@ -293,32 +272,27 @@ define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i6  ; CHECK32-LABEL: test_two_live_flags:  ; CHECK32:       # %bb.0: # %entry  ; CHECK32-NEXT:    pushl %ebp -; CHECK32-NEXT:    movl %esp, %ebp  ; CHECK32-NEXT:    pushl %ebx  ; CHECK32-NEXT:    pushl %edi  ; CHECK32-NEXT:    pushl %esi -; CHECK32-NEXT:    movl 44(%ebp), %edi -; CHECK32-NEXT:    movl 12(%ebp), %eax -; CHECK32-NEXT:    movl 16(%ebp), %edx -; CHECK32-NEXT:    movl 20(%ebp), %ebx -; CHECK32-NEXT:    movl 24(%ebp), %ecx -; CHECK32-NEXT:    movl 8(%ebp), %esi +; CHECK32-NEXT:    pushl %eax +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edi +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ebp +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %edx +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ebx +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi +; CHECK32-NEXT:    lock cmpxchg8b (%esi) +; CHECK32-NEXT:    setne {{[0-9]+}}(%esp) # 1-byte Folded Spill +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT:    movl %edi, %edx +; CHECK32-NEXT:    movl %ebp, %ecx +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %ebx +; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %esi  ; CHECK32-NEXT:    lock cmpxchg8b (%esi) -; CHECK32-NEXT:    seto %al -; CHECK32-NEXT:    lahf -; CHECK32-NEXT:    movl %eax, %esi -; CHECK32-NEXT:    movl 32(%ebp), %eax -; CHECK32-NEXT:    movl 36(%ebp), %edx -; CHECK32-NEXT:    movl %edi, %ecx -; CHECK32-NEXT:    movl 40(%ebp), %ebx -; CHECK32-NEXT:    movl 28(%ebp), %edi -; CHECK32-NEXT:    lock cmpxchg8b (%edi)  ; CHECK32-NEXT:    sete %al -; CHECK32-NEXT:    pushl %eax -; CHECK32-NEXT:    movl %esi, %eax -; CHECK32-NEXT:    addb $127, %al -; CHECK32-NEXT:    sahf -; CHECK32-NEXT:    popl %eax +; CHECK32-NEXT:    testb $-1, {{[0-9]+}}(%esp) # 1-byte Folded Reload  ; CHECK32-NEXT:    jne .LBB5_4  ; CHECK32-NEXT:  # %bb.1: # %entry  ; CHECK32-NEXT:    testb %al, %al @@ -330,6 +304,7 @@ define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i6  ; CHECK32-NEXT:    xorl %eax, %eax  ; CHECK32-NEXT:  .LBB5_3: # %t  ; CHECK32-NEXT:    xorl %edx, %edx +; CHECK32-NEXT:    addl $4, %esp  ; CHECK32-NEXT:    popl %esi  ; CHECK32-NEXT:    popl %edi  ; CHECK32-NEXT:    popl %ebx @@ -338,32 +313,22 @@ define i64 @test_two_live_flags(i64* %foo0, i64 %bar0, i64 %baz0, i64* %foo1, i6  ;  ; CHECK64-LABEL: test_two_live_flags:  ; CHECK64:       # %bb.0: # %entry -; CHECK64-NEXT:    pushq %rbp -; CHECK64-NEXT:    movq %rsp, %rbp  ; CHECK64-NEXT:    movq %rsi, %rax  ; CHECK64-NEXT:    lock cmpxchgq %rdx, (%rdi) -; CHECK64-NEXT:    seto %al -; CHECK64-NEXT:    lahf -; CHECK64-NEXT:    movq %rax, %rdx +; CHECK64-NEXT:    setne %dl  ; CHECK64-NEXT:    movq %r8, %rax  ; CHECK64-NEXT:    lock cmpxchgq %r9, (%rcx)  ; CHECK64-NEXT:    sete %al -; CHECK64-NEXT:    pushq %rax -; CHECK64-NEXT:    movq %rdx, %rax -; CHECK64-NEXT:    addb $127, %al -; CHECK64-NEXT:    sahf -; CHECK64-NEXT:    popq %rax +; CHECK64-NEXT:    testb $-1, %dl  ; CHECK64-NEXT:    jne .LBB5_3  ; CHECK64-NEXT:  # %bb.1: # %entry  ; CHECK64-NEXT:    testb %al, %al  ; CHECK64-NEXT:    je .LBB5_3  ; CHECK64-NEXT:  # %bb.2: # %t  ; CHECK64-NEXT:    movl $42, %eax -; CHECK64-NEXT:    popq %rbp  ; CHECK64-NEXT:    retq  ; CHECK64-NEXT:  .LBB5_3: # %f  ; CHECK64-NEXT:    xorl %eax, %eax -; CHECK64-NEXT:    popq %rbp  ; CHECK64-NEXT:    retq  entry:    %cx0 = cmpxchg i64* %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst diff --git a/llvm/test/CodeGen/X86/win64_frame.ll b/llvm/test/CodeGen/X86/win64_frame.ll index e8733472df0..49350a8edf8 100644 --- a/llvm/test/CodeGen/X86/win64_frame.ll +++ b/llvm/test/CodeGen/X86/win64_frame.ll @@ -224,71 +224,29 @@ entry:  declare i64 @dummy()  define i64 @f10(i64* %foo, i64 %bar, i64 %baz) { -; PUSHF-LABEL: f10: -; PUSHF:       # %bb.0: -; PUSHF-NEXT:    pushq %rbp -; PUSHF-NEXT:    .seh_pushreg 5 -; PUSHF-NEXT:    pushq %rsi -; PUSHF-NEXT:    .seh_pushreg 6 -; PUSHF-NEXT:    pushq %rdi -; PUSHF-NEXT:    .seh_pushreg 7 -; PUSHF-NEXT:    subq $32, %rsp -; PUSHF-NEXT:    .seh_stackalloc 32 -; PUSHF-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp -; PUSHF-NEXT:    .seh_setframe 5, 32 -; PUSHF-NEXT:    .seh_endprologue -; PUSHF-NEXT:    movq %rdx, %rsi -; PUSHF-NEXT:    movq %rdx, %rax -; PUSHF-NEXT:    lock cmpxchgq %r8, (%rcx) -; PUSHF-NEXT:    pushfq -; PUSHF-NEXT:    popq %rdi -; PUSHF-NEXT:    callq dummy -; PUSHF-NEXT:    pushq %rdi -; PUSHF-NEXT:    popfq -; PUSHF-NEXT:    cmovneq %rsi, %rax -; PUSHF-NEXT:    addq $32, %rsp -; PUSHF-NEXT:    popq %rdi -; PUSHF-NEXT:    popq %rsi -; PUSHF-NEXT:    popq %rbp -; PUSHF-NEXT:    retq -; PUSHF-NEXT:    .seh_handlerdata -; PUSHF-NEXT:    .text -; PUSHF-NEXT:    .seh_endproc -; -; SAHF-LABEL: f10: -; SAHF:       # %bb.0: -; SAHF-NEXT:    pushq %rbp -; SAHF-NEXT:    .seh_pushreg 5 -; SAHF-NEXT:    pushq %rsi -; SAHF-NEXT:    .seh_pushreg 6 -; SAHF-NEXT:    pushq %rdi -; SAHF-NEXT:    .seh_pushreg 7 -; SAHF-NEXT:    subq $32, %rsp -; SAHF-NEXT:    .seh_stackalloc 32 -; SAHF-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp -; SAHF-NEXT:    .seh_setframe 5, 32 -; SAHF-NEXT:    .seh_endprologue -; SAHF-NEXT:    movq %rdx, %rsi -; SAHF-NEXT:    movq %rdx, %rax -; SAHF-NEXT:    lock cmpxchgq %r8, (%rcx) -; SAHF-NEXT:    seto %al -; SAHF-NEXT:    lahf -; SAHF-NEXT:    movq %rax, %rdi -; SAHF-NEXT:    callq dummy -; SAHF-NEXT:    pushq %rax -; SAHF-NEXT:    movq %rdi, %rax -; SAHF-NEXT:    addb $127, %al -; SAHF-NEXT:    sahf -; SAHF-NEXT:    popq %rax -; SAHF-NEXT:    cmovneq %rsi, %rax -; SAHF-NEXT:    addq $32, %rsp -; SAHF-NEXT:    popq %rdi -; SAHF-NEXT:    popq %rsi -; SAHF-NEXT:    popq %rbp -; SAHF-NEXT:    retq -; SAHF-NEXT:    .seh_handlerdata -; SAHF-NEXT:    .text -; SAHF-NEXT:    .seh_endproc +; ALL-LABEL: f10: +; ALL:       # %bb.0: +; ALL-NEXT:    pushq %rsi +; ALL-NEXT:    .seh_pushreg 6 +; ALL-NEXT:    pushq %rbx +; ALL-NEXT:    .seh_pushreg 3 +; ALL-NEXT:    subq $40, %rsp +; ALL-NEXT:    .seh_stackalloc 40 +; ALL-NEXT:    .seh_endprologue +; ALL-NEXT:    movq %rdx, %rsi +; ALL-NEXT:    movq %rdx, %rax +; ALL-NEXT:    lock cmpxchgq %r8, (%rcx) +; ALL-NEXT:    sete %bl +; ALL-NEXT:    callq dummy +; ALL-NEXT:    testb $-1, %bl +; ALL-NEXT:    cmoveq %rsi, %rax +; ALL-NEXT:    addq $40, %rsp +; ALL-NEXT:    popq %rbx +; ALL-NEXT:    popq %rsi +; ALL-NEXT:    retq +; ALL-NEXT:    .seh_handlerdata +; ALL-NEXT:    .text +; ALL-NEXT:    .seh_endproc    %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst    %v = extractvalue { i64, i1 } %cx, 0    %p = extractvalue { i64, i1 } %cx, 1 diff --git a/llvm/test/CodeGen/X86/x86-repmov-copy-eflags.ll b/llvm/test/CodeGen/X86/x86-repmov-copy-eflags.ll index 49afb39b6d4..2a55dd41f69 100644 --- a/llvm/test/CodeGen/X86/x86-repmov-copy-eflags.ll +++ b/llvm/test/CodeGen/X86/x86-repmov-copy-eflags.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck %s  target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"  target triple = "i686-pc-windows-msvc18.0.0" @@ -39,15 +39,12 @@ declare void @g(%struct.T*)  ; CHECK:     leal 8(%esp), %esi  ; CHECK:     decl     (%esp) -; CHECK:     seto     %al -; CHECK:     lahf -; CHECK:     movl     %eax, %edi +; CHECK:     setne    %[[NE_REG:.*]]  ; CHECK:     pushl     %esi  ; CHECK:     calll     _g  ; CHECK:     addl     $4, %esp -; CHECK:     movl     %edi, %eax -; CHECK:     addb     $127, %al -; CHECK:     sahf +; CHECK:     testb    $-1, %[[NE_REG]] +; CHECK:     jne  attributes #0 = { nounwind optsize }  attributes #1 = { argmemonly nounwind }  | 

